In [78]:
import numpy as np
import pandas as pd
from pylab import *
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
In [2]:
df = pd.read_csv('data/adjunct.csv')
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6929 entries, 0 to 6928
Data columns (total 35 columns):
department_id                    6929 non-null int64
payaverage                       6929 non-null int64
paytype                          2593 non-null float64
user_supplied_department_name    6719 non-null object
course_type                      1471 non-null float64
contract_type                    1724 non-null object
period_term                      4517 non-null float64
period_year                      4517 non-null float64
has_contract                     0 non-null float64
has_retirement                   6224 non-null float64
has_health_insurance             6249 non-null float64
has_governance                   6202 non-null float64
has_union                        5797 non-null float64
union_name                       4348 non-null object
is_school_rep                    6929 non-null int64
course_syllabus                  4503 non-null float64
office_space                     4429 non-null float64
student_evals                    3857 non-null float64
has_advanced_degree              4532 non-null float64
paytype_credits                  566 non-null float64
paytype_class_size               34 non-null float64
paytype_hours                    216 non-null float64
paytype_weeks                    216 non-null float64
paytype_courses                  111 non-null float64
department_type_id               6929 non-null int64
display_name                     6929 non-null object
department_type_name             6929 non-null object
college_class                    6929 non-null object
college_carnegie                 6929 non-null object
college_unit_id                  6929 non-null int64
college_state                    6929 non-null object
college_id                       6929 non-null int64
college_hbcu                     0 non-null float64
college_flagship                 0 non-null float64
college_name                     6929 non-null object
dtypes: float64(20), int64(6), object(9)

Replace had_advanced_degree with meaningful labels.

In [69]:
degree_map = {0: 'None', 1: 'MA/MS', 2: 'PhD', 3: 'JD', 4: 'MD'}
df.has_advanced_degree = df.has_advanced_degree.replace(degree_map)
In [34]:
axes = df.payaverage.hist(bins=math.sqrt(2000), by=df.has_union, normed=True)
axes[0].set_title("Not Unionized")
axes[1].set_title("Unionized")
Out[34]:
<matplotlib.text.Text at 0x10dde8a50>
In [36]:
df.course_type.unique()
Out[36]:
array([  0.,   1.,  nan])
In [33]:
axes = df.payaverage.hist(bins=math.sqrt(2000), by=df.course_type, normed=True)
In [70]:
axes = df.payaverage.hist(bins=math.sqrt(1000), by=df.has_advanced_degree, normed=True)
In [71]:
df.groupby('has_advanced_degree').payaverage.describe().unstack()
Out[71]:
count mean std min 25% 50% 75% max
has_advanced_degree
JD 62 3620.854839 2647.832339 1200 2200.00 3000.0 3787.50 18000
MA/MS 2700 2796.370370 1393.114004 300 1950.00 2500.0 3300.00 20000
MD 6 2946.833333 1535.190466 1386 1735.00 2591.5 4083.25 5082
None 200 3145.580000 1667.177476 930 1993.75 2725.0 3753.75 9945
PhD 1564 3543.102302 1724.795824 450 2398.75 3066.0 4481.25 15000

5 rows × 8 columns

In [72]:
df.boxplot('payaverage', by='has_advanced_degree')
Out[72]:
<matplotlib.axes.AxesSubplot at 0x10a1f3810>
In [73]:
 
In [79]:
ma = df[df.has_advanced_degree=='MA/MS']
dr = df[df.has_advanced_degree=='PhD']
nodeg = df[df.has_advanced_degree=='None']

print(stats.ttest_ind(ma.payaverage, dr.payaverage))
print(stats.ttest_ind(ma.payaverage, nodeg.payaverage))
(array(-15.428027721651176), 2.6855989136275075e-52)
(array(-3.3709109788858163), 0.00075901665280692879)

In [84]:
df.boxplot('payaverage', by='college_class', rot=45)
Out[84]:
<matplotlib.axes.AxesSubplot at 0x10d32ff10>
In [82]:
df.college_class.unique()
Out[82]:
array(['4-year public', '2-year public', '4-year private not-for-profit',
       '4-year private for-profit', 'Administrative Unit',
       '2-year private not-for-profit', '2-year private for-profit',
       '2-year Public'], dtype=object)
In [87]:
df[df.has_advanced_degree.isin(['MA/MS','PhD','None'])].groupby(['college_class', 'has_advanced_degree']).payaverage.describe().unstack()
Out[87]:
count mean std min 25% 50% 75% max
college_class has_advanced_degree
2-year Public MA/MS 9 2426.444444 479.757780 1800 1800.00 2600.0 2762.0 2899
PhD 2 2755.000000 205.060967 2610 2682.50 2755.0 2827.5 2900
2-year private for-profit MA/MS 35 1467.371429 491.882763 500 1102.00 1500.0 1850.0 2400
None 2 1258.000000 223.445743 1100 1179.00 1258.0 1337.0 1416
PhD 4 1286.250000 225.291478 1125 1181.25 1200.0 1305.0 1620
2-year private not-for-profit MA/MS 7 1712.142857 590.224937 1050 1392.50 1500.0 1900.0 2850
2-year public MA/MS 812 2369.863300 1135.398714 450 1692.00 2116.5 2700.0 12000
None 68 2580.294118 1597.194500 930 1530.00 2190.0 2994.0 9945
PhD 223 2631.869955 1287.654973 713 1848.00 2340.0 3000.0 9920
4-year private for-profit MA/MS 142 1780.612676 733.041501 666 1300.00 1647.5 2100.0 6000
None 5 1620.000000 433.517589 1200 1250.00 1525.0 1925.0 2200
PhD 75 2073.373333 767.782887 595 1587.50 2000.0 2476.5 5620
4-year private not-for-profit MA/MS 839 3099.891538 1514.441550 300 2100.00 2850.0 3800.0 20000
None 66 3700.212121 1711.597810 1060 2500.00 3112.5 4800.0 8000
PhD 672 3812.418155 1868.524017 450 2506.00 3300.0 4803.0 15000
4-year public MA/MS 845 3152.487574 1387.765105 600 2250.00 3000.0 3610.0 18000
None 59 3369.932203 1480.968531 1000 2400.00 3300.0 3924.5 9360
PhD 580 3802.272414 1568.430001 800 2500.00 3500.0 4825.0 12200
Administrative Unit MA/MS 11 2107.454545 459.555299 1600 1747.50 2100.0 2265.5 3156
PhD 8 2635.500000 1053.929653 1200 1800.00 2550.0 3550.0 3984

20 rows × 8 columns

In [113]:
df['public'] = df.college_class.str.contains('public|Public')
df['private'] = df.college_class.str.contains('private')
df['for-profit'] = df.college_class.str.contains(' for-profit') # the leading space ensures it doesn't match "not-for-profit"
df['private not-for-profit'] = (df.private & ~df['for-profit'])
In [150]:
df['2-year'] = df.college_class.str.contains('2')
df['2-year'][df.college_class == 'Administrative Unit'] = np.nan
df['2-year'].value_counts()
Out[150]:
0    5137
1    1751
dtype: int64
In [125]:
df.department_type_name.value_counts()
Out[125]:
English                           1114
Business                           346
Arts                               327
Composition, Rhetoric, Writing     325
History                            296
Not Specified                      288
Humanities                         244
Psychology                         241
General Studies                    238
Liberal Arts                       236
Education                          204
Social Sciences                    204
Communications                     192
Sociology                          188
Biology                            181
...
Environmental Studies and Forestry    22
Public Administration                 22
Area Studies                          19
Cultural and Ethnic Studies           17
Linguistics                           14
Other                                  9
Family and Consumer Science            7
Divinity                               7
Statistics                             4
Archaeology                            4
Landscape Architecture                 3
Librarian                              2
Transportation                         2
Systems Science                        1
Agriculture                            1
Length: 63, dtype: int64

Using the coding listed in the data dictionary:

In [119]:
departments = {
101000: 'General Studies',
102000: 'Liberal Arts',
200000: 'Humanities',
201000: 'History',
202000: 'Linguistics',
203000: 'Literature',
203100: 'Foreign Language',
203200: 'English',
203300: 'Composition, Rhetoric, Writing',
204000: 'Arts',
204100: 'Performing Arts',
204200: 'Visual Arts',
204300: 'Film and Media',
204400: 'Music',
205000: 'Philosophy',
206000: 'Religion',
300000: 'Social Sciences',
301000: 'Anthropology',
302000: 'Archaeology',
303000: 'Area Studies',
304000: 'Cultural and Ethnic Studies',
305000: 'Economics',
306000: 'Gender and Sexuality Studies',
307000: 'Geography',
308000: 'Political Science',
308100: 'International Relations',
309000: 'Psychology',
310000: 'Sociology',
400000: 'Science and Technology',
401000: 'Space Science',
402000: 'Earth Sciences',
403000: 'Biology',
404000: 'Chemistry',
405000: 'Physics',
406000: 'Engineering',
407000: 'Computer Sciences',
408000: 'Mathematics',
409000: 'Statistics',
410000: 'Systems Science',
600000: 'Professions and Applied sciences',
601000: 'Agriculture',
602000: 'Architecture and Design',
602100: 'Landscape Architecture',
603000: 'Business',
603100: 'Accounting',
604000: 'Divinity',
605000: 'Education',
607000: 'Environmental Studies and Forestry',
608000: 'Family and Consumer Science',
609000: 'Health Science',
609100: 'Nursing',
610000: 'Human Physical Performance and Recreation',
611000: 'Communications',
612000: 'Law',
613000: 'Library and Museum Studies',
614000: 'Military Sciences',
615000: 'Public Administration',
616000: 'Social Work',
617000: 'Transportation',
618000: 'Criminal Studies',
888888: 'Not Specified',
999999: 'Other'}
In [194]:
#df['division'] = 
'''
divisions = {
'1': 'General Studies/Liberal Arts',
'2': 'Arts/Humanities',
'3': 'Social Sciences',
'4': 'Science/Technology',
'6': 'Professions/Applied Science',
'8': np.nan,
'9': 'Other'
}
'''
divisions = {
'1': 'Gen/LibA',
'2': 'Arts/Hum',
'3': 'SocSci',
'4': 'Sci/Tech',
'6': 'Prof/ApS',
'8': np.nan,
'9': 'Other'
}

df['division'] = df.department_type_id.apply(lambda x: str(x)[0:1]).replace(divisions)
In [140]:
df.groupby(['public']).payaverage.describe().unstack()
Out[140]:
count mean std min 25% 50% 75% max
public
False 2911 3193.933356 1701.327580 300 2100 2820.0 3900.00 20000
True 4018 2957.375809 1410.249553 450 2000 2567.5 3574.75 18000

2 rows × 8 columns

In [151]:
df.boxplot('payaverage', by='private not-for-profit', rot=45)
df.groupby(['private not-for-profit']).payaverage.describe().unstack()
Out[151]:
count mean std min 25% 50% 75% max
private not-for-profit
False 4425 2859.963164 1397.415906 450 1950.00 2500 3483 18000
True 2504 3404.528355 1718.649341 300 2278.75 3000 4000 20000

2 rows × 8 columns

In [153]:
df.boxplot('payaverage', by='division', rot=90)
df.groupby(['division']).payaverage.describe().unstack()
Out[153]:
count mean std min 25% 50% 75% max
division
Arts/Humanities 3245 3027.400308 1452.950085 450 2001 2700 3700 15500
General Studies/Liberal Arts 474 2681.419831 1318.727846 300 1800 2400 3200 10500
Other 9 2656.888889 1209.167838 1138 1895 2475 3324 5000
Professions/Applied Science 1203 3158.650873 1757.174617 450 2100 2800 3800 20000
Science/Technology 691 3239.274964 1677.514657 713 2100 2800 4000 12000
Social Sciences 1019 3156.970559 1582.704235 508 2100 2800 3800 12575

6 rows × 8 columns

In [154]:
import statsmodels.formula.api as sm
In [155]:
from pandas.stats.api import ols
In [195]:
pd.concat([data, pd.get_dummies(data.division, prefix="div")])
Out[195]:
2-year college_carnegie college_class college_flagship college_hbcu college_id college_name college_state college_unit_id contract_type course_syllabus course_type degree_JD degree_MA/MS degree_MD degree_None degree_PhD degree_nan department_id department_type_id
1809 0 Doctoral and Research Universities 4-year private not-for-profit NaN NaN 737 American University District of Columbia 131159 NaN 2 NaN 0 1 0 0 0 0 198 611000 ...
1830 0 Masters Colleges and Universities--larger prog... 4-year public NaN NaN 538 San Jose State University California 122755 NaN NaN NaN 0 0 0 0 1 0 1905 611000 ...
1920 0 Doctoral and Research Universities 4-year private not-for-profit NaN NaN 737 American University District of Columbia 131159 NaN 2 NaN 0 0 0 1 0 0 198 611000 ...
1921 0 Research Universities--very high research acti... 4-year public NaN NaN 3340 University of South Carolina at Columbia South Carolina 218663 NaN 3 NaN 0 1 0 0 0 0 1985 611000 ...
1922 0 Research Universities--high research activity 4-year public NaN NaN 2840 Kent State University Ohio 203517 NaN 1 NaN 0 1 0 0 0 0 1986 611100 ...
1923 0 Research Universities--high research activity 4-year public NaN NaN 2840 Kent State University Ohio 203517 NaN 2 NaN 0 1 0 0 0 0 1986 611100 ...
1924 0 Research Universities--very high research acti... 4-year private not-for-profit NaN NaN 2351 Columbia University New York 190150 NaN 2 NaN 0 1 0 0 0 0 1987 611100 ...
1926 0 Research Universities--very high research acti... 4-year public NaN NaN 3340 University of South Carolina at Columbia South Carolina 218663 NaN 2 NaN 0 1 0 0 0 0 1985 611000 ...
1928 0 Doctoral and Research Universities 4-year private not-for-profit NaN NaN 1041 DePaul University Illinois 144740 NaN 3 NaN 0 0 0 1 0 0 1990 204100 ...
1929 0 Research Universities--very high research acti... 4-year private not-for-profit NaN NaN 1027 University of Chicago Illinois 144050 NaN 3 NaN 0 0 0 1 0 0 1991 204100 ...
1930 0 Research Universities--very high research acti... 4-year private not-for-profit NaN NaN 1027 University of Chicago Illinois 144050 NaN 2 NaN 0 0 0 1 0 0 1992 204000 ...
1931 0 Research Universities--very high research acti... 4-year public NaN NaN 2691 University of North Carolina at Chapel Hill North Carolina 199120 NaN 1 NaN 0 0 0 1 0 0 782 611100 ...
1932 0 Research Universities--very high research acti... 4-year public NaN NaN 3832 University of Washington Washington 236948 NaN 2 NaN 0 1 0 0 0 0 1994 611000 ...
1933 0 Baccalaureate Colleges--Arts and Sciences 4-year private not-for-profit NaN NaN 2139 Doane College Nebraska 181020 NaN 3 NaN 0 0 0 0 1 0 792 203200 ...
1934 1 Associates--Public Suburban-serving Single Campus 2-year public NaN NaN 4036 Warren County Community College New Jersey 245625 NaN 1 NaN 0 1 0 0 0 0 1995 200000 ...
1935 0 Research Universities--high research activity 4-year public NaN NaN 15 University of Alabama at Tuscaloosa Alabama 100751 NaN 2 NaN 0 0 0 0 1 0 1996 200000 ...
1936 0 Baccalaureate Colleges--Arts and Sciences 4-year private not-for-profit NaN NaN 3269 Washington and Jefferson College Pennsylvania 216667 NaN 2 NaN 0 1 0 0 0 0 1893 203200 ...
1937 0 Research Universities--very high research acti... 4-year private not-for-profit NaN NaN 3208 University of Pennsylvania Pennsylvania 215062 NaN 3 NaN 0 1 0 0 0 0 1997 203300 ...
1938 0 Doctoral and Research Universities 4-year private not-for-profit NaN NaN 737 American University District of Columbia 131159 NaN 2 NaN 0 0 0 0 1 0 198 611000 ...
1939 0 Masters Colleges and Universities--medium prog... 4-year private not-for-profit NaN NaN 1327 Upper Iowa University Iowa 154493 NaN 1 NaN 0 0 0 0 1 0 1998 102000 ...
1940 0 Research Universities--high research activity 4-year public NaN NaN 3936 University of Wisconsin at Milwaukee Wisconsin 240453 NaN 2 NaN 0 0 0 0 1 0 1999 203200 ...
1941 0 Masters Colleges and Universities--larger prog... 4-year public NaN NaN 3585 Texas State University at San Marcos Texas 228459 NaN 3 NaN 0 0 0 0 1 0 2000 203200 ...
1942 1 Associates--Public Suburban-serving Single Campus 2-year public NaN NaN 3352 Tri-County Technical College South Carolina 218885 NaN 0 NaN 0 1 0 0 0 0 2001 203200 ...
1943 0 Research Universities--very high research acti... 4-year private not-for-profit NaN NaN 1123 Northwestern University Illinois 147767 NaN 2 NaN 0 0 0 1 0 0 2002 611100 ...
1944 0 Doctoral and Research Universities 4-year public NaN NaN 1782 Central Michigan University Michigan 169248 NaN 3 NaN 0 0 0 1 0 0 2003 611100 ...
1945 1 Associates--Public Suburban-serving Single Campus 2-year public NaN NaN 3070 Butler County Community College (Pa.) Pennsylvania 211343 NaN 1 NaN 0 1 0 0 0 0 2004 203200 ...
1946 0 Masters Colleges and Universities--larger prog... 4-year public NaN NaN 3147 Lincoln University (Pa.) Pennsylvania 213598 NaN 2 NaN 0 0 0 1 0 0 2005 611000 ...
1947 0 Masters Colleges and Universities--larger prog... 4-year private not-for-profit NaN NaN 698 University of Hartford Connecticut 129525 NaN 2 NaN 0 1 0 0 0 0 2006 605000 ...
1948 0 Baccalaureate Colleges--Diverse Fields 4-year private not-for-profit NaN NaN 2388 Elmira College New York 190983 NaN 2 NaN 0 1 0 0 0 0 2007 102000 ...
1950 0 Research Universities--high research activity 4-year public NaN NaN 1705 University of Massachusetts at Lowell Massachusetts 166513 NaN NaN NaN 0 1 0 0 0 0 2010 309000 ...
1954 0 Research Universities--high research activity 4-year public NaN NaN 5 Auburn University Alabama 100858 NaN 2 NaN 0 1 0 0 0 0 2012 611100 ...
1955 1 Associates--Public Rural-serving Medium 2-year public NaN NaN 2097 Three Rivers Community College (Mo.) Missouri 179645 NaN 1 NaN 0 0 0 1 0 0 2013 102000 ...
1956 1 Associates--Public Urban-serving Multicampus 2-year public NaN NaN 149 Pima Community College Arizona 105525 NaN 1 NaN 0 1 0 0 0 0 2014 605000 ...
1957 0 Masters Colleges and Universities--larger prog... 4-year public NaN NaN 1609 University of Maryland University College Maryland 163204 NaN 0 NaN 0 0 0 0 1 0 613 407000 ...
1958 0 Masters Colleges and Universities--larger prog... 4-year public NaN NaN 3927 University of Wisconsin at La Crosse Wisconsin 240329 NaN 2 NaN 0 0 0 0 1 0 2015 203100 ...
1960 1 Associates--Public Rural-serving Large 2-year public NaN NaN 761 Brevard Community College Florida 132693 NaN 2 NaN 0 0 0 1 0 0 2017 308100 ...
1961 0 Doctoral and Research Universities 4-year private not-for-profit NaN NaN 3124 Immaculata University Pennsylvania 213011 NaN 3 NaN 0 1 0 0 0 0 2018 600000 ...
1963 0 Doctoral and Research Universities 4-year private not-for-profit NaN NaN 757 Barry University Florida 132471 NaN 0 NaN 0 0 0 0 1 0 2020 102000 ...
1965 0 Masters Colleges and Universities--larger prog... 4-year public NaN NaN 3718 James Madison University Virginia 232423 NaN 2 NaN 0 1 0 0 0 0 2023 603000 ...
1966 0 Baccalaureate Colleges--Diverse Fields 4-year private not-for-profit NaN NaN 3092 Delaware Valley College Pennsylvania 211981 NaN 2 NaN 0 1 0 0 0 0 2024 603000 ...
1967 0 Research Universities--high research activity 4-year private not-for-profit NaN NaN 3099 Duquesne University Pennsylvania 212106 NaN 2 NaN 0 1 0 0 0 0 2025 609000 ...
1968 0 Masters Colleges and Universities--larger prog... 4-year private not-for-profit NaN NaN 2589 Touro College (N.Y.) New York 196592 NaN 2 NaN 0 0 0 0 1 0 2026 605000 ...
1969 1 Associates--Public Urban-serving Multicampus 2-year public NaN NaN 2810 Cuyahoga Community College Ohio 202356 NaN 2 NaN 0 1 0 0 0 0 2027 203200 ...
1970 0 Research Universities--very high research acti... 4-year public NaN NaN 3218 University of Pittsburgh main campus Pennsylvania 215293 NaN 2 NaN 0 1 0 0 0 0 2028 605000 ...
1971 0 Associates--Public Urban-serving Multicampus 4-year public NaN NaN 762 Broward College Florida 132709 NaN 2 NaN 0 0 0 0 1 0 1506 309000 ...
1972 0 Research Universities--high research activity 4-year public NaN NaN 3421 University of Memphis Tennessee 220862 NaN 0 NaN 0 0 0 0 1 0 2029 605000 ...
1974 0 Masters Colleges and Universities--smaller pro... 4-year private not-for-profit NaN NaN 1682 Emmanuel College (Mass.) Massachusetts 165671 NaN 1 NaN 0 0 0 0 1 0 527 400000 ...
1975 0 Research Universities--very high research acti... 4-year public NaN NaN 1612 University of Maryland at College Park Maryland 163286 NaN 3 NaN 0 0 0 0 1 0 2031 204400 ...
1976 1 Associates--Public Rural-serving Large 2-year public NaN NaN 4179 NorthWest Arkansas Community College Arkansas 367459 NaN 2 NaN 0 0 0 0 1 0 2032 407000 ...
1977 1 Associates--Public Suburban-serving Multicampus 2-year public NaN NaN 2270 Union County College New Jersey 187198 NaN 0 NaN 0 1 0 0 0 0 2033 200000 ...
1978 0 Doctoral and Research Universities 4-year private not-for-profit NaN NaN 2262 Seton Hall University New Jersey 186584 NaN 2 NaN 0 1 0 0 0 0 2034 201000 ...
1979 0 Associates--Public Suburban-serving Single Campus 4-year public NaN NaN 845 Seminole State College of Florida Florida 137209 NaN 2 NaN 0 1 0 0 0 0 2035 407000 ...
1980 0 Masters Colleges and Universities--larger prog... 4-year private not-for-profit NaN NaN 2704 Pfeiffer University North Carolina 199306 NaN 3 NaN 0 1 0 0 0 0 2036 407000 ...
1981 0 Masters Colleges and Universities--larger prog... 4-year public NaN NaN 2242 Kean University New Jersey 185262 NaN 2 NaN 0 1 0 0 0 0 2037 308000 ...
1982 1 Associates--Public Urban-serving Multicampus 2-year public NaN NaN 2230 Essex County College New Jersey 184481 NaN 1 NaN 0 1 0 0 0 0 2038 201000 ...
1983 0 Research Universities--very high research acti... 4-year private not-for-profit NaN NaN 2355 Cornell University New York 190415 NaN 2 NaN 0 1 0 0 0 0 2039 602000 ...
1984 0 Research Universities--high research activity 4-year public NaN NaN 627 University of Colorado at Denver Colorado 126562 NaN NaN NaN 0 1 0 0 0 0 2040 605000 ...
1985 0 Masters Colleges and Universities--larger prog... 4-year private not-for-profit NaN NaN 3231 Robert Morris University (Pa.) Pennsylvania 215655 NaN 3 NaN 0 1 0 0 0 0 2041 605000 ...
1986 0 Masters Colleges and Universities--larger prog... 4-year private not-for-profit NaN NaN 1750 Simmons College Massachusetts 167783 NaN 3 NaN 0 0 0 0 1 0 2042 613000 ...
1987 0 Research Universities--high research activity 4-year private not-for-profit NaN NaN 3464 Baylor University Texas 223232 NaN 2 NaN 0 0 0 0 1 0 2043 605000 ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

8364 rows × 58 columns

In [202]:
data = pd.concat([df, pd.get_dummies(df.has_advanced_degree, prefix='degree', dummy_na=True)], axis=1)
data = pd.concat([data, pd.get_dummies(data.division, prefix="div")], axis=1)
data.info()
data.filter(like='degree')
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6929 entries, 0 to 6928
Data columns (total 53 columns):
department_id                    6929 non-null int64
payaverage                       6929 non-null int64
paytype                          2593 non-null float64
user_supplied_department_name    6719 non-null object
course_type                      1471 non-null float64
contract_type                    1724 non-null object
period_term                      4517 non-null float64
period_year                      4517 non-null float64
has_contract                     0 non-null float64
has_retirement                   6224 non-null float64
has_health_insurance             6249 non-null float64
has_governance                   6202 non-null float64
has_union                        5797 non-null float64
union_name                       4348 non-null object
is_school_rep                    6929 non-null int64
course_syllabus                  4503 non-null float64
office_space                     4429 non-null float64
student_evals                    3857 non-null float64
has_advanced_degree              4532 non-null object
paytype_credits                  566 non-null float64
paytype_class_size               34 non-null float64
paytype_hours                    216 non-null float64
paytype_weeks                    216 non-null float64
paytype_courses                  111 non-null float64
department_type_id               6929 non-null int64
display_name                     6929 non-null object
department_type_name             6929 non-null object
college_class                    6929 non-null object
college_carnegie                 6929 non-null object
college_unit_id                  6929 non-null int64
college_state                    6929 non-null object
college_id                       6929 non-null int64
college_hbcu                     0 non-null float64
college_flagship                 0 non-null float64
college_name                     6929 non-null object
public                           6929 non-null bool
private                          6929 non-null bool
for-profit                       6929 non-null bool
private not-for-profit           6929 non-null bool
division                         6641 non-null object
2-year                           6888 non-null float64
degree_JD                        6929 non-null float64
degree_MA/MS                     6929 non-null float64
degree_MD                        6929 non-null float64
degree_None                      6929 non-null float64
degree_PhD                       6929 non-null float64
degree_nan                       6929 non-null float64
div_Arts/Hum                     6929 non-null float64
div_Gen/LibA                     6929 non-null float64
div_Other                        6929 non-null float64
div_Prof/ApS                     6929 non-null float64
div_Sci/Tech                     6929 non-null float64
div_SocSci                       6929 non-null float64
dtypes: bool(4), float64(32), int64(6), object(11)
Out[202]:
has_advanced_degree degree_JD degree_MA/MS degree_MD degree_None degree_PhD degree_nan
0 NaN 0 0 0 0 0 1
1 NaN 0 0 0 0 0 1
2 NaN 0 0 0 0 0 1
3 NaN 0 0 0 0 0 1
4 NaN 0 0 0 0 0 1
5 NaN 0 0 0 0 0 1
6 NaN 0 0 0 0 0 1
7 NaN 0 0 0 0 0 1
8 NaN 0 0 0 0 0 1
9 NaN 0 0 0 0 0 1
10 NaN 0 0 0 0 0 1
11 NaN 0 0 0 0 0 1
12 NaN 0 0 0 0 0 1
13 NaN 0 0 0 0 0 1
14 NaN 0 0 0 0 0 1
15 NaN 0 0 0 0 0 1
16 NaN 0 0 0 0 0 1
17 NaN 0 0 0 0 0 1
18 NaN 0 0 0 0 0 1
19 NaN 0 0 0 0 0 1
20 NaN 0 0 0 0 0 1
21 NaN 0 0 0 0 0 1
22 NaN 0 0 0 0 0 1
23 NaN 0 0 0 0 0 1
24 NaN 0 0 0 0 0 1
25 NaN 0 0 0 0 0 1
26 NaN 0 0 0 0 0 1
27 NaN 0 0 0 0 0 1
28 NaN 0 0 0 0 0 1
29 NaN 0 0 0 0 0 1
30 NaN 0 0 0 0 0 1
31 NaN 0 0 0 0 0 1
32 NaN 0 0 0 0 0 1
33 NaN 0 0 0 0 0 1
34 NaN 0 0 0 0 0 1
35 NaN 0 0 0 0 0 1
36 NaN 0 0 0 0 0 1
37 NaN 0 0 0 0 0 1
38 NaN 0 0 0 0 0 1
39 NaN 0 0 0 0 0 1
40 NaN 0 0 0 0 0 1
41 NaN 0 0 0 0 0 1
42 NaN 0 0 0 0 0 1
43 NaN 0 0 0 0 0 1
44 NaN 0 0 0 0 0 1
45 NaN 0 0 0 0 0 1
46 NaN 0 0 0 0 0 1
47 NaN 0 0 0 0 0 1
48 NaN 0 0 0 0 0 1
49 NaN 0 0 0 0 0 1
50 NaN 0 0 0 0 0 1
51 NaN 0 0 0 0 0 1
52 NaN 0 0 0 0 0 1
53 NaN 0 0 0 0 0 1
54 NaN 0 0 0 0 0 1
55 NaN 0 0 0 0 0 1
56 NaN 0 0 0 0 0 1
57 NaN 0 0 0 0 0 1
58 NaN 0 0 0 0 0 1
59 NaN 0 0 0 0 0 1
... ... ... ... ... ... ...

6929 rows × 7 columns

In [199]:
data.division.value_counts()
Out[199]:
Arts/Hum    3245
Prof/ApS    1203
SocSci      1019
Sci/Tech     691
Gen/LibA     474
Other          9
dtype: int64
In [221]:
data['any_health_ins'] = (data.has_health_insurance.isin([1,2,3])).where(
    (data.has_health_insurance.notnull()) & (data.has_health_insurance != 4))
data.filter(like='health')
Out[221]:
has_health_insurance any_health_ins
1809 4 NaN
1830 1 1
1920 0 0
1921 0 0
1922 0 0
1923 0 0
1924 3 1
1926 NaN NaN
1928 4 NaN
1929 4 NaN
1930 0 0
1931 0 0
1932 1 1
1933 0 0
1934 0 0
1935 0 0
1936 0 0
1937 0 0
1938 4 NaN
1939 0 0
1940 1 1
1941 3 1
1942 0 0
1943 0 0
1944 1 1
1945 0 0
1946 0 0
1947 0 0
1948 0 0
1950 NaN NaN
1954 0 0
1955 0 0
1956 0 0
1957 3 1
1958 1 1
1960 0 0
1961 0 0
1963 0 0
1965 0 0
1966 0 0
1967 0 0
1968 0 0
1969 0 0
1970 0 0
1971 0 0
1972 0 0
1974 0 0
1975 NaN NaN
1976 0 0
1977 0 0
1978 0 0
1979 0 0
1980 0 0
1981 0 0
1982 0 0
1983 NaN NaN
1984 NaN NaN
1985 0 0
1986 0 0
1987 0 0
... ...

4182 rows × 2 columns

In [203]:
data = data[(data.has_advanced_degree.isin(['None','MA/MS','PhD'])) & 
            (data.public | data['private not-for-profit']) & 
            (data.division.isin(['Arts/Hum', 'Prof/ApS', 'SocSci', 'Sci/Tech', 'Gen/LibA']))]
            #(data.division.isin(['Arts/Humanities', 'Professions/Applied Science', 'Social Sciences', 
            #                     'Science/Technology', 'General Studies/Liberal Arts']))]
data.filter(like='div_')
Out[203]:
div_Arts/Hum div_Gen/LibA div_Other div_Prof/ApS div_Sci/Tech div_SocSci
1809 0 0 0 1 0 0
1830 0 0 0 1 0 0
1920 0 0 0 1 0 0
1921 0 0 0 1 0 0
1922 0 0 0 1 0 0
1923 0 0 0 1 0 0
1924 0 0 0 1 0 0
1926 0 0 0 1 0 0
1928 1 0 0 0 0 0
1929 1 0 0 0 0 0
1930 1 0 0 0 0 0
1931 0 0 0 1 0 0
1932 0 0 0 1 0 0
1933 1 0 0 0 0 0
1934 1 0 0 0 0 0
1935 1 0 0 0 0 0
1936 1 0 0 0 0 0
1937 1 0 0 0 0 0
1938 0 0 0 1 0 0
1939 0 1 0 0 0 0
1940 1 0 0 0 0 0
1941 1 0 0 0 0 0
1942 1 0 0 0 0 0
1943 0 0 0 1 0 0
1944 0 0 0 1 0 0
1945 1 0 0 0 0 0
1946 0 0 0 1 0 0
1947 0 0 0 1 0 0
1948 0 1 0 0 0 0
1950 0 0 0 0 0 1
1954 0 0 0 1 0 0
1955 0 1 0 0 0 0
1956 0 0 0 1 0 0
1957 0 0 0 0 1 0
1958 1 0 0 0 0 0
1960 0 0 0 0 0 1
1961 0 0 0 1 0 0
1963 0 1 0 0 0 0
1965 0 0 0 1 0 0
1966 0 0 0 1 0 0
1967 0 0 0 1 0 0
1968 0 0 0 1 0 0
1969 1 0 0 0 0 0
1970 0 0 0 1 0 0
1971 0 0 0 0 0 1
1972 0 0 0 1 0 0
1974 0 0 0 0 1 0
1975 1 0 0 0 0 0
1976 0 0 0 0 1 0
1977 1 0 0 0 0 0
1978 1 0 0 0 0 0
1979 0 0 0 0 1 0
1980 0 0 0 0 1 0
1981 0 0 0 0 0 1
1982 1 0 0 0 0 0
1983 0 0 0 1 0 0
1984 0 0 0 1 0 0
1985 0 0 0 1 0 0
1986 0 0 0 1 0 0
1987 0 0 0 1 0 0
... ... ... ... ... ...

4182 rows × 6 columns

In [225]:
y = data.payaverage
log_y = y.apply(np.log)
x = data[['degree_MA/MS', 'degree_PhD', '2-year', 'public', 'has_union', 
          'div_Arts/Hum', 'div_Prof/ApS', 'div_SocSci', 'div_Sci/Tech', 'any_health_ins']]
x2 = data[['degree_MA/MS', 'degree_PhD', '2-year', 'public', 'has_union', 
          'div_Arts/Hum', 'div_Prof/ApS', 'div_SocSci', 'div_Sci/Tech']]
          #'divis_Arts/Humanities', 'divis_Professions/Applied Science', 'divis_Social Sciences', 'divis_Science/Technology']]
x.info()
res = ols(y=y, x=x)
res2 = ols(y=log_y, x=x)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4182 entries, 1809 to 6927
Data columns (total 10 columns):
degree_MA/MS      4182 non-null float64
degree_PhD        4182 non-null float64
2-year            4182 non-null float64
public            4182 non-null bool
has_union         3533 non-null float64
div_Arts/Hum      4182 non-null float64
div_Prof/ApS      4182 non-null float64
div_SocSci        4182 non-null float64
div_Sci/Tech      4182 non-null float64
any_health_ins    3663 non-null float64
dtypes: bool(1), float64(9)
In [223]:
print res

-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <degree_MA/MS> + <degree_PhD> + <2-year> + <public> + <has_union>
             + <div_Arts/Hum> + <div_Prof/ApS> + <div_SocSci> + <div_Sci/Tech>
             + <any_health_ins> + <intercept>

Number of Observations:         3225
Number of Degrees of Freedom:   11

R-squared:         0.1861
Adj R-squared:     0.1836

Rmse:           1357.0738

F-stat (10, 3214):    73.5015, p-value:     0.0000

Degrees of Freedom: model 10, resid 3214

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
  degree_MA/MS  -249.2784   116.1686      -2.15     0.0320  -476.9690   -21.5879
    degree_PhD   270.8295   119.9225       2.26     0.0240    35.7813   505.8776
        2-year  -786.0221    62.2996     -12.62     0.0000  -908.1294  -663.9149
        public  -147.7148    57.5558      -2.57     0.0103  -260.5242   -34.9054
     has_union  -554.4217   377.8007      -1.47     0.1423 -1294.9111   186.0677
--------------------------------------------------------------------------------
  div_Arts/Hum   206.9524   104.4024       1.98     0.0475     2.3237   411.5810
  div_Prof/ApS   266.6175   113.3500       2.35     0.0187    44.4516   488.7835
    div_SocSci   159.1791   116.6479       1.36     0.1725   -69.4508   387.8090
  div_Sci/Tech   345.5963   122.0551       2.83     0.0047   106.3682   584.8244
any_health_ins  1014.7623    59.6721      17.01     0.0000   897.8051  1131.7196
--------------------------------------------------------------------------------
     intercept  3585.8706   400.6223       8.95     0.0000  2800.6508  4371.0903
---------------------------------End of Summary---------------------------------


In [224]:
print res2

-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <degree_MA/MS> + <degree_PhD> + <2-year> + <public> + <has_union>
             + <div_Arts/Hum> + <div_Prof/ApS> + <div_SocSci> + <div_Sci/Tech>
             + <any_health_ins> + <intercept>

Number of Observations:         3225
Number of Degrees of Freedom:   11

R-squared:         0.2038
Adj R-squared:     0.2014

Rmse:              0.3978

F-stat (10, 3214):    82.2899, p-value:     0.0000

Degrees of Freedom: model 10, resid 3214

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
  degree_MA/MS    -0.0522     0.0341      -1.53     0.1254    -0.1189     0.0145
    degree_PhD     0.0956     0.0352       2.72     0.0066     0.0267     0.1645
        2-year    -0.2904     0.0183     -15.91     0.0000    -0.3262    -0.2547
        public    -0.0221     0.0169      -1.31     0.1897    -0.0552     0.0109
     has_union    -0.1075     0.1107      -0.97     0.3316    -0.3246     0.1095
--------------------------------------------------------------------------------
  div_Arts/Hum     0.0776     0.0306       2.54     0.0113     0.0176     0.1376
  div_Prof/ApS     0.0955     0.0332       2.87     0.0041     0.0303     0.1606
    div_SocSci     0.0719     0.0342       2.10     0.0356     0.0049     0.1389
  div_Sci/Tech     0.1216     0.0358       3.40     0.0007     0.0514     0.1917
any_health_ins     0.2805     0.0175      16.03     0.0000     0.2462     0.3147
--------------------------------------------------------------------------------
     intercept     7.9976     0.1174      68.11     0.0000     7.7675     8.2278
---------------------------------End of Summary---------------------------------


In [226]:
ols(y=log_y, x=x2)
Out[226]:

-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <degree_MA/MS> + <degree_PhD> + <2-year> + <public> + <has_union>
             + <div_Arts/Hum> + <div_Prof/ApS> + <div_SocSci> + <div_Sci/Tech>
             + <intercept>

Number of Observations:         3533
Number of Degrees of Freedom:   10

R-squared:         0.1433
Adj R-squared:     0.1411

Rmse:              0.4158

F-stat (9, 3523):    65.4917, p-value:     0.0000

Degrees of Freedom: model 9, resid 3523

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
  degree_MA/MS    -0.0839     0.0339      -2.48     0.0133    -0.1504    -0.0175
    degree_PhD     0.0759     0.0349       2.17     0.0299     0.0074     0.1444
        2-year    -0.3234     0.0182     -17.80     0.0000    -0.3590    -0.2878
        public     0.0326     0.0164       1.99     0.0471     0.0004     0.0648
     has_union    -0.0659     0.0957      -0.69     0.4910    -0.2535     0.1217
--------------------------------------------------------------------------------
  div_Arts/Hum     0.0923     0.0310       2.98     0.0029     0.0316     0.1530
  div_Prof/ApS     0.0996     0.0336       2.96     0.0031     0.0337     0.1654
    div_SocSci     0.0905     0.0344       2.63     0.0086     0.0231     0.1580
  div_Sci/Tech     0.1290     0.0359       3.59     0.0003     0.0586     0.1995
     intercept     8.0078     0.1041      76.94     0.0000     7.8038     8.2118
---------------------------------End of Summary---------------------------------
In [227]:
import statsmodels.formula.api as smf
In [236]:
data.has_advanced_degree.value_counts()
Out[236]:
MA/MS    2512
PhD      1477
None      193
dtype: int64
In [309]:
results = smf.ols("log(payaverage) ~ C(has_advanced_degree, Treatment(reference='None'))" \
                  "*C(division, Treatment(reference='Gen/LibA')) + has_union + public + two_year + C(has_health_insurance)", data=data).fit()
results.summary()
Out[309]:
OLS Regression Results
Dep. Variable: log(payaverage) R-squared: 0.209
Model: OLS Adj. R-squared: 0.204
Method: Least Squares F-statistic: 42.67
Date: Wed, 30 Apr 2014 Prob (F-statistic): 3.55e-155
Time: 17:21:03 Log-Likelihood: -1689.2
No. Observations: 3406 AIC: 3422.
Df Residuals: 3384 BIC: 3557.
Df Model: 21
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 7.6717 0.143 53.748 0.000 7.392 7.952
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS] 0.2110 0.113 1.871 0.061 -0.010 0.432
C(has_advanced_degree, Treatment(reference='None'))[T.PhD] 0.4117 0.118 3.492 0.000 0.181 0.643
C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] 0.3471 0.119 2.923 0.003 0.114 0.580
C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] 0.4846 0.121 3.990 0.000 0.246 0.723
C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] 0.2858 0.130 2.201 0.028 0.031 0.540
C(division, Treatment(reference='Gen/LibA'))[T.SocSci] 0.4744 0.161 2.953 0.003 0.159 0.789
public[T.True] -0.0296 0.017 -1.788 0.074 -0.062 0.003
C(has_health_insurance)[T.1.0] 0.3326 0.026 12.850 0.000 0.282 0.383
C(has_health_insurance)[T.2.0] 0.2882 0.032 9.091 0.000 0.226 0.350
C(has_health_insurance)[T.3.0] 0.2145 0.027 7.889 0.000 0.161 0.268
C(has_health_insurance)[T.4.0] 0.0795 0.031 2.576 0.010 0.019 0.140
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] -0.2731 0.125 -2.187 0.029 -0.518 -0.028
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] -0.3434 0.130 -2.639 0.008 -0.599 -0.088
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] -0.4019 0.129 -3.127 0.002 -0.654 -0.150
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] -0.4614 0.134 -3.431 0.001 -0.725 -0.198
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] -0.1913 0.138 -1.386 0.166 -0.462 0.079
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] -0.1766 0.142 -1.241 0.215 -0.456 0.103
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] -0.4115 0.167 -2.471 0.014 -0.738 -0.085
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] -0.4584 0.170 -2.693 0.007 -0.792 -0.125
has_union -0.0368 0.095 -0.389 0.697 -0.222 0.149
two_year -0.2808 0.018 -15.451 0.000 -0.316 -0.245
Omnibus: 121.189 Durbin-Watson: 1.851
Prob(Omnibus): 0.000 Jarque-Bera (JB): 313.464
Skew: -0.133 Prob(JB): 8.55e-69
Kurtosis: 4.462 Cond. No. 121.
In [306]:
res2 = smf.ols("log(payaverage) ~ has_advanced_degree*(division) + C(two_year)", data=data).fit()
res2.summary()
Out[306]:
OLS Regression Results
Dep. Variable: log(payaverage) R-squared: 0.147
Model: OLS Adj. R-squared: 0.144
Method: Least Squares F-statistic: 47.97
Date: Wed, 30 Apr 2014 Prob (F-statistic): 6.04e-132
Time: 17:17:40 Log-Likelihood: -2284.5
No. Observations: 4182 AIC: 4601.
Df Residuals: 4166 BIC: 4702.
Df Model: 15
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 7.9780 0.013 635.700 0.000 7.953 8.003
has_advanced_degree[T.None] 0.0528 0.052 1.015 0.310 -0.049 0.155
has_advanced_degree[T.PhD] 0.1468 0.021 7.113 0.000 0.106 0.187
division[T.Gen/LibA] -0.0966 0.037 -2.624 0.009 -0.169 -0.024
division[T.Prof/ApS] -0.0030 0.023 -0.131 0.896 -0.048 0.042
division[T.Sci/Tech] -5.09e-05 0.030 -0.002 0.999 -0.060 0.060
division[T.SocSci] -0.0180 0.025 -0.712 0.476 -0.067 0.032
C(two_year)[T.1.0] -0.3035 0.015 -20.138 0.000 -0.333 -0.274
has_advanced_degree[T.None]:division[T.Gen/LibA] -0.1612 0.119 -1.350 0.177 -0.395 0.073
has_advanced_degree[T.PhD]:division[T.Gen/LibA] 0.0722 0.063 1.149 0.251 -0.051 0.195
has_advanced_degree[T.None]:division[T.Prof/ApS] 0.2017 0.079 2.568 0.010 0.048 0.356
has_advanced_degree[T.PhD]:division[T.Prof/ApS] 0.0022 0.039 0.056 0.955 -0.074 0.078
has_advanced_degree[T.None]:division[T.Sci/Tech] -0.0794 0.089 -0.888 0.374 -0.255 0.096
has_advanced_degree[T.PhD]:division[T.Sci/Tech] 0.0914 0.045 2.047 0.041 0.004 0.179
has_advanced_degree[T.None]:division[T.SocSci] 0.1447 0.134 1.084 0.278 -0.117 0.406
has_advanced_degree[T.PhD]:division[T.SocSci] 0.0382 0.038 0.994 0.320 -0.037 0.114
Omnibus: 113.848 Durbin-Watson: 1.833
Prob(Omnibus): 0.000 Jarque-Bera (JB): 281.411
Skew: 0.014 Prob(JB): 7.81e-62
Kurtosis: 4.270 Cond. No. 27.1
In [265]:
data.rename(columns={'2_year':'two_year'}, inplace=True)
In [262]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4182 entries, 1809 to 6927
Data columns (total 54 columns):
department_id                    4182 non-null int64
payaverage                       4182 non-null int64
paytype                          662 non-null float64
user_supplied_department_name    4177 non-null object
course_type                      0 non-null float64
contract_type                    0 non-null object
period_term                      4121 non-null float64
period_year                      4121 non-null float64
has_contract                     0 non-null float64
has_retirement                   3878 non-null float64
has_health_insurance             3914 non-null float64
has_governance                   3897 non-null float64
has_union                        3533 non-null float64
union_name                       3515 non-null object
is_school_rep                    4182 non-null int64
course_syllabus                  4083 non-null float64
office_space                     4003 non-null float64
student_evals                    3483 non-null float64
has_advanced_degree              4182 non-null object
paytype_credits                  486 non-null float64
paytype_class_size               18 non-null float64
paytype_hours                    168 non-null float64
paytype_weeks                    168 non-null float64
paytype_courses                  103 non-null float64
department_type_id               4182 non-null int64
display_name                     4182 non-null object
department_type_name             4182 non-null object
college_class                    4182 non-null object
college_carnegie                 4182 non-null object
college_unit_id                  4182 non-null int64
college_state                    4182 non-null object
college_id                       4182 non-null int64
college_hbcu                     0 non-null float64
college_flagship                 0 non-null float64
college_name                     4182 non-null object
public                           4182 non-null bool
private                          4182 non-null bool
for-profit                       4182 non-null bool
private not-for-profit           4182 non-null bool
division                         4182 non-null object
2_year                           4182 non-null float64
degree_JD                        4182 non-null float64
degree_MA/MS                     4182 non-null float64
degree_MD                        4182 non-null float64
degree_None                      4182 non-null float64
degree_PhD                       4182 non-null float64
degree_nan                       4182 non-null float64
div_Arts/Hum                     4182 non-null float64
div_Gen/LibA                     4182 non-null float64
div_Other                        4182 non-null float64
div_Prof/ApS                     4182 non-null float64
div_Sci/Tech                     4182 non-null float64
div_SocSci                       4182 non-null float64
any_health_ins                   3663 non-null float64
dtypes: bool(4), float64(33), int64(6), object(11)
In []:
result.t_test()
In [303]:
data.payaverage.apply(np.log).plot(kind='kde')
Out[303]:
<matplotlib.axes.AxesSubplot at 0x113de5b90>
In [291]:
results.params.index
Out[291]:
Index([u'Intercept', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]', u'C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum]', u'C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS]', u'C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech]', u'C(division, Treatment(reference='Gen/LibA'))[T.SocSci]', u'public[T.True]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci]', u'has_union', u'two_year', u'public[T.True]:two_year'], dtype='object')
In [333]:
results = smf.ols(
    "log(payaverage) ~ "\
    "C(has_advanced_degree, Treatment(reference='None'))*C(division, Treatment(reference='Gen/LibA'))"\
    "+ has_union + C(college_class) + any_health_ins"
    "+ course_syllabus"
    , data=data).fit()
results.summary()
Out[333]:
OLS Regression Results
Dep. Variable: log(payaverage) R-squared: 0.242
Model: OLS Adj. R-squared: 0.237
Method: Least Squares F-statistic: 48.38
Date: Wed, 30 Apr 2014 Prob (F-statistic): 1.30e-173
Time: 17:39:15 Log-Likelihood: -1506.9
No. Observations: 3197 AIC: 3058.
Df Residuals: 3175 BIC: 3191.
Df Model: 21
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 7.4011 0.208 35.666 0.000 6.994 7.808
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS] 0.1916 0.110 1.737 0.083 -0.025 0.408
C(has_advanced_degree, Treatment(reference='None'))[T.PhD] 0.3947 0.115 3.422 0.001 0.169 0.621
C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] 0.2930 0.117 2.505 0.012 0.064 0.522
C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] 0.4776 0.119 4.021 0.000 0.245 0.710
C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] 0.3658 0.129 2.830 0.005 0.112 0.619
C(division, Treatment(reference='Gen/LibA'))[T.SocSci] 0.4481 0.161 2.782 0.005 0.132 0.764
C(college_class)[T.2-year private not-for-profit] -0.2048 0.222 -0.923 0.356 -0.640 0.230
C(college_class)[T.2-year public] -0.1448 0.139 -1.044 0.296 -0.417 0.127
C(college_class)[T.4-year private not-for-profit] 0.1383 0.138 0.999 0.318 -0.133 0.410
C(college_class)[T.4-year public] 0.1093 0.139 0.788 0.431 -0.162 0.381
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] -0.2057 0.123 -1.670 0.095 -0.447 0.036
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] -0.2917 0.128 -2.274 0.023 -0.543 -0.040
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] -0.3719 0.126 -2.950 0.003 -0.619 -0.125
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] -0.4369 0.132 -3.308 0.001 -0.696 -0.178
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] -0.2224 0.137 -1.618 0.106 -0.492 0.047
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] -0.2467 0.142 -1.739 0.082 -0.525 0.031
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] -0.3961 0.167 -2.371 0.018 -0.724 -0.069
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] -0.4560 0.171 -2.673 0.008 -0.791 -0.121
has_union -0.0844 0.113 -0.748 0.455 -0.306 0.137
any_health_ins 0.2665 0.017 15.441 0.000 0.233 0.300
course_syllabus 0.0973 0.008 11.924 0.000 0.081 0.113
Omnibus: 143.220 Durbin-Watson: 1.832
Prob(Omnibus): 0.000 Jarque-Bera (JB): 411.572
Skew: -0.173 Prob(JB): 4.25e-90
Kurtosis: 4.723 Cond. No. 172.
In [330]:
res3 = smf.ols(
"log(payaverage) ~ college_carnegie", data=data).fit()
res3.summary()
Out[330]:
OLS Regression Results
Dep. Variable: log(payaverage) R-squared: 0.294
Model: OLS Adj. R-squared: 0.288
Method: Least Squares F-statistic: 50.80
Date: Wed, 30 Apr 2014 Prob (F-statistic): 5.26e-283
Time: 17:35:54 Log-Likelihood: -1889.6
No. Observations: 4182 AIC: 3849.
Df Residuals: 4147 BIC: 4071.
Df Model: 34
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 7.6848 0.382 20.129 0.000 6.936 8.433
college_carnegie[T.Associate's--Public Rural-serving Medium] 0.1854 0.418 0.443 0.658 -0.635 1.005
college_carnegie[T.Associate's--Public Suburban-serving Multicampus_] 0.1166 0.399 0.292 0.770 -0.665 0.898
college_carnegie[T.Associate's--Public Urban-serving Multicampus] -0.1892 0.540 -0.350 0.726 -1.248 0.869
college_carnegie[T.Associates--Private Not-for-profit] -0.2084 0.399 -0.523 0.601 -0.990 0.573
college_carnegie[T.Associates--Private Not-for-profit 4-year Primarily Associates] -0.0931 0.412 -0.226 0.821 -0.902 0.715
college_carnegie[T.Associates--Public 2-year colleges under 4-year universities] 0.2094 0.396 0.529 0.597 -0.567 0.986
college_carnegie[T.Associates--Public 4-year Primarily Associates] -0.0373 0.386 -0.097 0.923 -0.793 0.719
college_carnegie[T.Associates--Public Rural-serving Large] -0.0528 0.383 -0.138 0.890 -0.803 0.697
college_carnegie[T.Associates--Public Rural-serving Medium] -0.1522 0.383 -0.398 0.691 -0.903 0.598
college_carnegie[T.Associates--Public Rural-serving Small] -0.1306 0.394 -0.331 0.741 -0.904 0.642
college_carnegie[T.Associates--Public Special Use] -0.4752 0.408 -1.164 0.244 -1.275 0.325
college_carnegie[T.Associates--Public Suburban-serving Multicampus] 0.1881 0.383 0.491 0.623 -0.563 0.939
college_carnegie[T.Associates--Public Suburban-serving Single Campus] 0.0920 0.383 0.240 0.810 -0.659 0.843
college_carnegie[T.Associates--Public Urban-serving Multicampus] 0.0513 0.383 0.134 0.893 -0.699 0.801
college_carnegie[T.Associates--Public Urban-serving Single Campus] 0.0110 0.384 0.029 0.977 -0.741 0.763
college_carnegie[T.Baccalaureate Colleges--Arts and Sciences] 0.3979 0.383 1.039 0.299 -0.353 1.148
college_carnegie[T.Baccalaureate Colleges--Diverse Fields] 0.0938 0.383 0.245 0.806 -0.657 0.844
college_carnegie[T.Baccalaureate and Associates Colleges] -0.1336 0.387 -0.346 0.730 -0.891 0.624
college_carnegie[T.Doctoral and Research Universities] 0.3410 0.383 0.891 0.373 -0.409 1.091
college_carnegie[T.Master's Colleges and Universities (medium programs)] 0.3166 0.540 0.586 0.558 -0.742 1.375
college_carnegie[T.Master's Colleges and Universities--larger programs] -0.1568 0.418 -0.375 0.708 -0.977 0.663
college_carnegie[T.Master's Colleges and Universities--medium programs] 0.4761 0.394 1.207 0.227 -0.297 1.249
college_carnegie[T.Masters Colleges and Universities--larger programs] 0.2871 0.382 0.752 0.452 -0.462 1.036
college_carnegie[T.Masters Colleges and Universities--medium programs] 0.2151 0.383 0.562 0.574 -0.535 0.965
college_carnegie[T.Masters Colleges and Universities--smaller programs] 0.1599 0.384 0.417 0.677 -0.592 0.912
college_carnegie[T.Other health professions schools] 0.3096 0.394 0.787 0.431 -0.462 1.081
college_carnegie[T.Other technology-related schools] -0.5263 0.441 -1.194 0.233 -1.391 0.338
college_carnegie[T.Research Universities--high research activity] 0.4150 0.382 1.086 0.278 -0.335 1.165
college_carnegie[T.Research Universities--very high research activity] 0.7037 0.382 1.842 0.066 -0.045 1.453
college_carnegie[T.Schools of art- music- and design] 0.5276 0.385 1.372 0.170 -0.226 1.281
college_carnegie[T.Schools of business and management] -0.3893 0.399 -0.976 0.329 -1.171 0.392
college_carnegie[T.Schools of engineering] 0.3219 0.441 0.730 0.465 -0.542 1.186
college_carnegie[T.Theological seminaries- Bible colleges- and other faith-related institutions] -0.0367 0.400 -0.092 0.927 -0.822 0.748
college_carnegie[T.Tribal Colleges] -0.2327 0.468 -0.498 0.619 -1.149 0.684
Omnibus: 150.798 Durbin-Watson: 1.851
Prob(Omnibus): 0.000 Jarque-Bera (JB): 440.628
Skew: -0.017 Prob(JB): 2.08e-96
Kurtosis: 4.590 Cond. No. 402.
In [331]:
data.college_carnegie.value_counts()
Out[331]:
Masters Colleges and Universities--larger programs              866
Research Universities--very high research activity              612
Research Universities--high research activity                   364
Associates--Public Urban-serving Multicampus                    252
Doctoral and Research Universities                              232
Associates--Public Rural-serving Large                          229
Masters Colleges and Universities--medium programs              226
Baccalaureate Colleges--Diverse Fields                          199
Baccalaureate Colleges--Arts and Sciences                       187
Associates--Public Rural-serving Medium                         176
Associates--Public Suburban-serving Multicampus                 170
Associates--Public Suburban-serving Single Campus               166
Masters Colleges and Universities--smaller programs             104
Associates--Public Urban-serving Single Campus                  104
Schools of art- music- and design                                69
Associates--Public 4-year Primarily Associates                   50
Baccalaureate and Associates Colleges                            40
Other health professions schools                                 16
Associates--Public Rural-serving Small                           15
Master's Colleges and Universities--medium programs              15
Associates--Public 2-year colleges under 4-year universities     13
Associate's--Public Suburban-serving Multicampus_                11
Schools of business and management                               11
Associates--Private Not-for-profit                               11
Theological seminaries- Bible colleges- and other faith-related institutions     10
Associates--Public Special Use                                    7
Associates--Private Not-for-profit 4-year Primarily Associates      6
Master's Colleges and Universities--larger programs               5
Associate's--Public Rural-serving Medium                          5
Other technology-related schools                                  3
Schools of engineering                                            3
Tribal Colleges                                                   2
Master's Colleges and Universities (medium programs)              1
Associate's--Public Urban-serving Multicampus                     1
Associate's--Public Rural-serving Large                           1
dtype: int64
In [345]:
pd.pivot_table(data, rows='union_name', aggfunc={'has_union': 'count'})
Out[345]:
has_union
union_name
AAUP 92
AFSCME 13
AFT 177
NEA 82
None 2783
Other 309
SEIU 58

7 rows × 1 columns

In [348]:
ipeds = pd.read_csv('data/ipeds2012/CSV_512014-1015/CSV_512014-1015.csv')
In [364]:
ipeds.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 7330 entries, 0 to 7329
Data columns (total 161 columns):
unitid                                                                                                                                                     int64
institution name                                                                                                                                           object
year                                                                                                                                                       int64
DRVIC2012.Percent admitted - total                                                                                                                         float64
DRVIC2012.Admissions yield - total                                                                                                                         float64
DRVIC2012.Tuition and fees, 2009-10                                                                                                                        float64
DRVIC2012.Tuition and fees, 2010-11                                                                                                                        float64
DRVIC2012.Tuition and fees, 2011-12                                                                                                                        float64
DRVIC2012.Tuition and fees, 2012-13                                                                                                                        float64
HD2012.Institution size category                                                                                                                           object
HD2012.Geographic region                                                                                                                                   object
HD2012.Sector of institution                                                                                                                               object
HD2012.Level of institution                                                                                                                                object
HD2012.Control of institution                                                                                                                              object
HD2012.Degree-granting status                                                                                                                              object
HD2012.Historically Black College or University                                                                                                            object
HD2012.Degree of urbanization (Urban-centric locale)                                                                                                       object
HD2012.Institutional category                                                                                                                              object
DRVEF2012.Undergraduate enrollment                                                                                                                         float64
DRVEF2012.Graduate enrollment                                                                                                                              float64
DRVEF2012.Full-time undergraduate enrollment                                                                                                               float64
DRVEF2012.Part-time undergraduate enrollment                                                                                                               float64
DRVEF2012.Percent of total enrollment that are American Indian or Alaska Native                                                                            float64
DRVEF2012.Percent of total enrollment that are Asian                                                                                                       float64
DRVEF2012.Percent of total enrollment that are Black or African American                                                                                   float64
DRVEF2012.Percent of total enrollment that are Hispanic/Latino                                                                                             float64
DRVEF2012.Percent of total enrollment that are Native Hawaiian or Other Pacific Islander                                                                   float64
DRVEF2012.Percent of total enrollment that are White                                                                                                       float64
DRVEF2012.Percent of total enrollment that are two or more races                                                                                           float64
DRVEF2012.Percent of total enrollment that are Race/ethnicity unknown                                                                                      float64
DRVEF2012.Percent of total enrollment that are Nonresident Alien                                                                                           float64
DRVEF2012.Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander                                                                      float64
DRVEF2012.Percent of total enrollment that are women                                                                                                       float64
EF2012D.Student-to-faculty ratio                                                                                                                           float64
EF2012D.Full-time retention rate, 2012                                                                                                                     float64
EF2012D.Part-time retention rate, 2012                                                                                                                     float64
DRVEF2012.Percent of undergraduate enrollment 18-24                                                                                                        float64
DRVEF2012.Percent of undergraduate enrollment, 25-64                                                                                                       float64
DRVEF2012.Percent of undergraduate enrollment over 65                                                                                                      float64
DRVEFDE2012.Percent of students enrolled exclusively in distance education courses                                                                         float64
DRVEFDE2012.Percent of students enrolled in some but not all distance education courses                                                                    float64
DRVEFDE2012.Percent of students not enrolled in any distance education courses                                                                             float64
DRVGR2012.Graduation rate, total cohort                                                                                                                    float64
DRVGR2012.Transfer-out rate, total cohort                                                                                                                  float64
GR200_12.Graduation rate - degree/certificate within 100% of normal time                                                                                   float64
DRVGR2012.Graduation rate - bachelor's degree within 4 years, total                                                                                        float64
SFA1112.Percent of full-time first-time undergraduates receiving any financial aid                                                                         float64
SFA1112.Percent of full-time first-time undergraduates receiving federal, state, local or institutional grant aid                                          float64
SFA1112.Average amount of federal, state, local or institutional grant aid received                                                                        float64
DRVF2012.Core revenues, total dollars (GASB)                                                                                                               float64
DRVF2012.Tuition and fees as a percent of core revenues (GASB)                                                                                             float64
DRVF2012.State appropriations as percent of core revenues  (GASB)                                                                                          float64
DRVF2012.Local appropriations as a percent of core revenues (GASB)                                                                                         float64
DRVF2012.Government grants and contracts as a percent of core revenues (GASB)                                                                              float64
DRVF2012.Private gifts, grants, and contracts as a percent of core revenues (GASB)                                                                         float64
DRVF2012.Investment return as a percent of core revenues (GASB)                                                                                            float64
DRVF2012.Other revenues as a percent of core revenues (GASB)                                                                                               float64
DRVF2012.Core revenues, total dollars (FASB)                                                                                                               float64
DRVF2012.Tuition and fees as a percent of core revenues (FASB)                                                                                             float64
DRVF2012.Government grants and contracts as a percent of core revenues (FASB)                                                                              float64
DRVF2012.Private gifts, grants, contracts/contributions from affiliated entities as a percent of core revenues (FASB)                                      float64
DRVF2012.Investment return as a percent of core revenues (FASB)                                                                                            float64
DRVF2012.Other revenues as a percent of core revenues (FASB)                                                                                               float64
DRVF2012.Core revenues, total dollars (for-profit institutions)                                                                                            float64
DRVF2012.Tuition and fees as a percent of core revenues (for-profit institutions)                                                                          float64
DRVF2012.Govenment appropriations, grants, and contracts as a percent of core revenues (for-profit institutions)                                           float64
DRVF2012.Sales and services of educational activities as a percent of core revenues (for-profit institutions)                                              float64
DRVF2012.Other revenues as a percent of core revenues (for-profit institutions)                                                                            float64
DRVF2012.Revenues from tuition and fees per FTE (GASB)                                                                                                     float64
DRVF2012.Revenues from tuition and fees per FTE (FASB)                                                                                                     float64
DRVF2012.Core expenses, total dollars (GASB)                                                                                                               float64
DRVF2012.Instruction expenses as a percent of total core expenses (GASB)                                                                                   float64
DRVF2012.Research expenses as a percent of total core expenses (GASB)                                                                                      float64
DRVF2012.Public service expenses as a percent of total core expenses (GASB)                                                                                float64
DRVF2012.Academic support expenses as a percent of total core expenses (GASB)                                                                              float64
DRVF2012.Student service expenses as a percent of total core expenses (GASB)                                                                               float64
DRVF2012.Institutional support expenses as a percent of total core expenses (GASB)                                                                         float64
DRVF2012.Other core expenses as a percent of total core expenses (GASB)                                                                                    float64
DRVF2012.Core expenses, total dollars (FASB)                                                                                                               float64
DRVF2012.Instruction expenses as a percent of total core expenses (FASB)                                                                                   float64
DRVF2012.Research expenses as a percent of total core expenses (FASB)                                                                                      float64
DRVF2012.Public service expenses as a percent of total core expenses (FASB)                                                                                float64
DRVF2012.Academic support expenses as a percent of total core expenses (FASB)                                                                              float64
DRVF2012.Student service expenses as a percent of total core expenses (FASB)                                                                               float64
DRVF2012.Institutional support expenses as a percent of total core expenses (FASB)                                                                         float64
DRVF2012.Other core expenses as a percent of total core expenses (FASB)                                                                                    float64
DRVF2012.Core expenses, total dollars (for-profit institutons)                                                                                             float64
DRVF2012.Instruction expenses as a percent of total core expenses (for-profit institutions)                                                                float64
DRVF2012.Academic and institutional support, and student service expenses as a percent of total core expenses (for-profit institutions)                    float64
DRVF2012.Other core expenses as a percent of total  core expenses (for-profit institutions)                                                                float64
DRVF2012.Instruction expenses per FTE  (GASB)                                                                                                              float64
DRVF2012.Research expenses per FTE  (GASB)                                                                                                                 float64
DRVF2012.Public service expenses per FTE (GASB)                                                                                                            float64
DRVF2012.Academic support expenses per FTE (GASB)                                                                                                          float64
DRVF2012.Student service expenses per FTE (GASB)                                                                                                           float64
DRVF2012.Institutional support expenses per FTE (GASB)                                                                                                     float64
DRVF2012.All other core expenses per FTE (GASB)                                                                                                            float64
DRVF2012.Instruction expenses per FTE  (FASB)                                                                                                              float64
DRVF2012.Research expenses per FTE (FASB)                                                                                                                  float64
DRVF2012.Public service expenses per FTE (FASB)                                                                                                            float64
DRVF2012.Academic support expenses per FTE (FASB)                                                                                                          float64
DRVF2012.Student service expenses per FTE (FASB)                                                                                                           float64
DRVF2012.Institutional support expenses per FTE (FASB)                                                                                                     float64
DRVF2012.All other core expenses per FTE (FASB)                                                                                                            float64
DRVF2012.Instruction expenses per FTE (for-profit institutions)                                                                                            float64
DRVF2012.Academic and institutional support, and student services  expense per FTE (for-profit institutions)                                               float64
DRVF2012.All other core expenses per FTE (for-profit institutions)                                                                                         float64
DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (GASB)                                                float64
DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (GASB)                                       float64
DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (GASB)                                             float64
DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (GASB)                                 float64
DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (GASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (GASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (GASB)                   float64
DRVF2012.Salaries, wages, and benefit expenses for other core expense functions  as a percent of total expenses for other core expense functions (GASB)    float64
DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (GASB)                                                                 float64
DRVF2012.Total salaries and wage expenses as a percent of total expenses (GASB)                                                                            float64
DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (FASB)                                                float64
DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (FASB)                                       float64
DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (FASB)                                             float64
DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (FASB)                                 float64
DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (FASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (FASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (FASB)                   float64
DRVF2012.Salaries, wages, and benefit expenses for other core expense functions  as a percent of total expenses for other core expense functions (FASB)    float64
DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (FASB)                                                                 float64
DRVF2012.Total salaries and wage expenses as a percent of total expenses (FASB)                                                                            float64
DRVF2012.Endowment assets (year end) per FTE enrollment (GASB)                                                                                             float64
DRVF2012.Endowment assets (year end) per FTE enrollment (FASB)                                                                                             float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - all ranks                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time insructional staff - professors                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - associate professors                                                       float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - assistant professors                                                       float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - instructors                                                                float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - lecturers                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - No academic rank                                                           float64
DRVHR2012.Total FTE staff                                                                                                                                  int64
DRVHR2012.Postsecondary Teachers FTE staff                                                                                                                 int64
DRVHR2012.Postsecondary Teachers Instructional FTE                                                                                                         int64
DRVHR2012.Postsecondary Teachers Research FTE                                                                                                              int64
DRVHR2012.Postsecondary Teachers Public Service FTE                                                                                                        int64
DRVHR2012.Librarians, Curators, and Archivists and other teaching and Instructional support occupations                                                    int64
DRVHR2012.Librarians, Curators, and Archivists FTE                                                                                                         int64
DRVHR2012.Other teaching and Instructional Support FTE                                                                                                     int64
DRVHR2012.Management FTE                                                                                                                                   int64
DRVHR2012.Business and Financial Operations FTE                                                                                                            int64
DRVHR2012.Computer, Engineering, and Science FTE                                                                                                           int64
DRVHR2012.Community Service, Legal, Arts, and Media FTE                                                                                                    int64
DRVHR2012.Healthcare FTE                                                                                                                                   int64
DRVHR2012.Service, sales, office/admin support, natural resources, construction, maintenance, production, transportation & materials moving FTE            int64
DRVHR2012.Service FTE                                                                                                                                      int64
DRVHR2012.Sales and Related FTE                                                                                                                            int64
DRVHR2012.Office and Administrative Support FTE                                                                                                            int64
DRVHR2012.Natural Resources, Construction, and Maintenance FTE                                                                                             int64
DRVHR2012.Production, Transportation, and Material Moving FTE                                                                                              int64
SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12.1                                                                           float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11.1                                                                           float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10.1                                                                           float64
dtypes: float64(130), int64(21), object(10)
In [363]:
ipeds[ipeds['institution name'].isin(data.college_name.unique())].info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1078 entries, 1 to 6453
Data columns (total 161 columns):
unitid                                                                                                                                                     int64
institution name                                                                                                                                           object
year                                                                                                                                                       int64
DRVIC2012.Percent admitted - total                                                                                                                         float64
DRVIC2012.Admissions yield - total                                                                                                                         float64
DRVIC2012.Tuition and fees, 2009-10                                                                                                                        float64
DRVIC2012.Tuition and fees, 2010-11                                                                                                                        float64
DRVIC2012.Tuition and fees, 2011-12                                                                                                                        float64
DRVIC2012.Tuition and fees, 2012-13                                                                                                                        float64
HD2012.Institution size category                                                                                                                           object
HD2012.Geographic region                                                                                                                                   object
HD2012.Sector of institution                                                                                                                               object
HD2012.Level of institution                                                                                                                                object
HD2012.Control of institution                                                                                                                              object
HD2012.Degree-granting status                                                                                                                              object
HD2012.Historically Black College or University                                                                                                            object
HD2012.Degree of urbanization (Urban-centric locale)                                                                                                       object
HD2012.Institutional category                                                                                                                              object
DRVEF2012.Undergraduate enrollment                                                                                                                         float64
DRVEF2012.Graduate enrollment                                                                                                                              float64
DRVEF2012.Full-time undergraduate enrollment                                                                                                               float64
DRVEF2012.Part-time undergraduate enrollment                                                                                                               float64
DRVEF2012.Percent of total enrollment that are American Indian or Alaska Native                                                                            float64
DRVEF2012.Percent of total enrollment that are Asian                                                                                                       float64
DRVEF2012.Percent of total enrollment that are Black or African American                                                                                   float64
DRVEF2012.Percent of total enrollment that are Hispanic/Latino                                                                                             float64
DRVEF2012.Percent of total enrollment that are Native Hawaiian or Other Pacific Islander                                                                   float64
DRVEF2012.Percent of total enrollment that are White                                                                                                       float64
DRVEF2012.Percent of total enrollment that are two or more races                                                                                           float64
DRVEF2012.Percent of total enrollment that are Race/ethnicity unknown                                                                                      float64
DRVEF2012.Percent of total enrollment that are Nonresident Alien                                                                                           float64
DRVEF2012.Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander                                                                      float64
DRVEF2012.Percent of total enrollment that are women                                                                                                       float64
EF2012D.Student-to-faculty ratio                                                                                                                           float64
EF2012D.Full-time retention rate, 2012                                                                                                                     float64
EF2012D.Part-time retention rate, 2012                                                                                                                     float64
DRVEF2012.Percent of undergraduate enrollment 18-24                                                                                                        float64
DRVEF2012.Percent of undergraduate enrollment, 25-64                                                                                                       float64
DRVEF2012.Percent of undergraduate enrollment over 65                                                                                                      float64
DRVEFDE2012.Percent of students enrolled exclusively in distance education courses                                                                         float64
DRVEFDE2012.Percent of students enrolled in some but not all distance education courses                                                                    float64
DRVEFDE2012.Percent of students not enrolled in any distance education courses                                                                             float64
DRVGR2012.Graduation rate, total cohort                                                                                                                    float64
DRVGR2012.Transfer-out rate, total cohort                                                                                                                  float64
GR200_12.Graduation rate - degree/certificate within 100% of normal time                                                                                   float64
DRVGR2012.Graduation rate - bachelor's degree within 4 years, total                                                                                        float64
SFA1112.Percent of full-time first-time undergraduates receiving any financial aid                                                                         float64
SFA1112.Percent of full-time first-time undergraduates receiving federal, state, local or institutional grant aid                                          float64
SFA1112.Average amount of federal, state, local or institutional grant aid received                                                                        float64
DRVF2012.Core revenues, total dollars (GASB)                                                                                                               float64
DRVF2012.Tuition and fees as a percent of core revenues (GASB)                                                                                             float64
DRVF2012.State appropriations as percent of core revenues  (GASB)                                                                                          float64
DRVF2012.Local appropriations as a percent of core revenues (GASB)                                                                                         float64
DRVF2012.Government grants and contracts as a percent of core revenues (GASB)                                                                              float64
DRVF2012.Private gifts, grants, and contracts as a percent of core revenues (GASB)                                                                         float64
DRVF2012.Investment return as a percent of core revenues (GASB)                                                                                            float64
DRVF2012.Other revenues as a percent of core revenues (GASB)                                                                                               float64
DRVF2012.Core revenues, total dollars (FASB)                                                                                                               float64
DRVF2012.Tuition and fees as a percent of core revenues (FASB)                                                                                             float64
DRVF2012.Government grants and contracts as a percent of core revenues (FASB)                                                                              float64
DRVF2012.Private gifts, grants, contracts/contributions from affiliated entities as a percent of core revenues (FASB)                                      float64
DRVF2012.Investment return as a percent of core revenues (FASB)                                                                                            float64
DRVF2012.Other revenues as a percent of core revenues (FASB)                                                                                               float64
DRVF2012.Core revenues, total dollars (for-profit institutions)                                                                                            float64
DRVF2012.Tuition and fees as a percent of core revenues (for-profit institutions)                                                                          float64
DRVF2012.Govenment appropriations, grants, and contracts as a percent of core revenues (for-profit institutions)                                           float64
DRVF2012.Sales and services of educational activities as a percent of core revenues (for-profit institutions)                                              float64
DRVF2012.Other revenues as a percent of core revenues (for-profit institutions)                                                                            float64
DRVF2012.Revenues from tuition and fees per FTE (GASB)                                                                                                     float64
DRVF2012.Revenues from tuition and fees per FTE (FASB)                                                                                                     float64
DRVF2012.Core expenses, total dollars (GASB)                                                                                                               float64
DRVF2012.Instruction expenses as a percent of total core expenses (GASB)                                                                                   float64
DRVF2012.Research expenses as a percent of total core expenses (GASB)                                                                                      float64
DRVF2012.Public service expenses as a percent of total core expenses (GASB)                                                                                float64
DRVF2012.Academic support expenses as a percent of total core expenses (GASB)                                                                              float64
DRVF2012.Student service expenses as a percent of total core expenses (GASB)                                                                               float64
DRVF2012.Institutional support expenses as a percent of total core expenses (GASB)                                                                         float64
DRVF2012.Other core expenses as a percent of total core expenses (GASB)                                                                                    float64
DRVF2012.Core expenses, total dollars (FASB)                                                                                                               float64
DRVF2012.Instruction expenses as a percent of total core expenses (FASB)                                                                                   float64
DRVF2012.Research expenses as a percent of total core expenses (FASB)                                                                                      float64
DRVF2012.Public service expenses as a percent of total core expenses (FASB)                                                                                float64
DRVF2012.Academic support expenses as a percent of total core expenses (FASB)                                                                              float64
DRVF2012.Student service expenses as a percent of total core expenses (FASB)                                                                               float64
DRVF2012.Institutional support expenses as a percent of total core expenses (FASB)                                                                         float64
DRVF2012.Other core expenses as a percent of total core expenses (FASB)                                                                                    float64
DRVF2012.Core expenses, total dollars (for-profit institutons)                                                                                             float64
DRVF2012.Instruction expenses as a percent of total core expenses (for-profit institutions)                                                                float64
DRVF2012.Academic and institutional support, and student service expenses as a percent of total core expenses (for-profit institutions)                    float64
DRVF2012.Other core expenses as a percent of total  core expenses (for-profit institutions)                                                                float64
DRVF2012.Instruction expenses per FTE  (GASB)                                                                                                              float64
DRVF2012.Research expenses per FTE  (GASB)                                                                                                                 float64
DRVF2012.Public service expenses per FTE (GASB)                                                                                                            float64
DRVF2012.Academic support expenses per FTE (GASB)                                                                                                          float64
DRVF2012.Student service expenses per FTE (GASB)                                                                                                           float64
DRVF2012.Institutional support expenses per FTE (GASB)                                                                                                     float64
DRVF2012.All other core expenses per FTE (GASB)                                                                                                            float64
DRVF2012.Instruction expenses per FTE  (FASB)                                                                                                              float64
DRVF2012.Research expenses per FTE (FASB)                                                                                                                  float64
DRVF2012.Public service expenses per FTE (FASB)                                                                                                            float64
DRVF2012.Academic support expenses per FTE (FASB)                                                                                                          float64
DRVF2012.Student service expenses per FTE (FASB)                                                                                                           float64
DRVF2012.Institutional support expenses per FTE (FASB)                                                                                                     float64
DRVF2012.All other core expenses per FTE (FASB)                                                                                                            float64
DRVF2012.Instruction expenses per FTE (for-profit institutions)                                                                                            float64
DRVF2012.Academic and institutional support, and student services  expense per FTE (for-profit institutions)                                               float64
DRVF2012.All other core expenses per FTE (for-profit institutions)                                                                                         float64
DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (GASB)                                                float64
DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (GASB)                                       float64
DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (GASB)                                             float64
DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (GASB)                                 float64
DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (GASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (GASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (GASB)                   float64
DRVF2012.Salaries, wages, and benefit expenses for other core expense functions  as a percent of total expenses for other core expense functions (GASB)    float64
DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (GASB)                                                                 float64
DRVF2012.Total salaries and wage expenses as a percent of total expenses (GASB)                                                                            float64
DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (FASB)                                                float64
DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (FASB)                                       float64
DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (FASB)                                             float64
DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (FASB)                                 float64
DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (FASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (FASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (FASB)                   float64
DRVF2012.Salaries, wages, and benefit expenses for other core expense functions  as a percent of total expenses for other core expense functions (FASB)    float64
DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (FASB)                                                                 float64
DRVF2012.Total salaries and wage expenses as a percent of total expenses (FASB)                                                                            float64
DRVF2012.Endowment assets (year end) per FTE enrollment (GASB)                                                                                             float64
DRVF2012.Endowment assets (year end) per FTE enrollment (FASB)                                                                                             float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - all ranks                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time insructional staff - professors                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - associate professors                                                       float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - assistant professors                                                       float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - instructors                                                                float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - lecturers                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - No academic rank                                                           float64
DRVHR2012.Total FTE staff                                                                                                                                  int64
DRVHR2012.Postsecondary Teachers FTE staff                                                                                                                 int64
DRVHR2012.Postsecondary Teachers Instructional FTE                                                                                                         int64
DRVHR2012.Postsecondary Teachers Research FTE                                                                                                              int64
DRVHR2012.Postsecondary Teachers Public Service FTE                                                                                                        int64
DRVHR2012.Librarians, Curators, and Archivists and other teaching and Instructional support occupations                                                    int64
DRVHR2012.Librarians, Curators, and Archivists FTE                                                                                                         int64
DRVHR2012.Other teaching and Instructional Support FTE                                                                                                     int64
DRVHR2012.Management FTE                                                                                                                                   int64
DRVHR2012.Business and Financial Operations FTE                                                                                                            int64
DRVHR2012.Computer, Engineering, and Science FTE                                                                                                           int64
DRVHR2012.Community Service, Legal, Arts, and Media FTE                                                                                                    int64
DRVHR2012.Healthcare FTE                                                                                                                                   int64
DRVHR2012.Service, sales, office/admin support, natural resources, construction, maintenance, production, transportation & materials moving FTE            int64
DRVHR2012.Service FTE                                                                                                                                      int64
DRVHR2012.Sales and Related FTE                                                                                                                            int64
DRVHR2012.Office and Administrative Support FTE                                                                                                            int64
DRVHR2012.Natural Resources, Construction, and Maintenance FTE                                                                                             int64
DRVHR2012.Production, Transportation, and Material Moving FTE                                                                                              int64
SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12.1                                                                           float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11.1                                                                           float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10.1                                                                           float64
dtypes: float64(130), int64(21), object(10)
In [359]:
pd.merge(data[['college_name']], ipeds, left_index=True, right_on='institution name').info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Data columns (total 162 columns):
college_name                                                                                                                                               object
unitid                                                                                                                                                     int64
institution name                                                                                                                                           object
year                                                                                                                                                       int64
DRVIC2012.Percent admitted - total                                                                                                                         float64
DRVIC2012.Admissions yield - total                                                                                                                         float64
DRVIC2012.Tuition and fees, 2009-10                                                                                                                        float64
DRVIC2012.Tuition and fees, 2010-11                                                                                                                        float64
DRVIC2012.Tuition and fees, 2011-12                                                                                                                        float64
DRVIC2012.Tuition and fees, 2012-13                                                                                                                        float64
HD2012.Institution size category                                                                                                                           object
HD2012.Geographic region                                                                                                                                   object
HD2012.Sector of institution                                                                                                                               object
HD2012.Level of institution                                                                                                                                object
HD2012.Control of institution                                                                                                                              object
HD2012.Degree-granting status                                                                                                                              object
HD2012.Historically Black College or University                                                                                                            object
HD2012.Degree of urbanization (Urban-centric locale)                                                                                                       object
HD2012.Institutional category                                                                                                                              object
DRVEF2012.Undergraduate enrollment                                                                                                                         float64
DRVEF2012.Graduate enrollment                                                                                                                              float64
DRVEF2012.Full-time undergraduate enrollment                                                                                                               float64
DRVEF2012.Part-time undergraduate enrollment                                                                                                               float64
DRVEF2012.Percent of total enrollment that are American Indian or Alaska Native                                                                            float64
DRVEF2012.Percent of total enrollment that are Asian                                                                                                       float64
DRVEF2012.Percent of total enrollment that are Black or African American                                                                                   float64
DRVEF2012.Percent of total enrollment that are Hispanic/Latino                                                                                             float64
DRVEF2012.Percent of total enrollment that are Native Hawaiian or Other Pacific Islander                                                                   float64
DRVEF2012.Percent of total enrollment that are White                                                                                                       float64
DRVEF2012.Percent of total enrollment that are two or more races                                                                                           float64
DRVEF2012.Percent of total enrollment that are Race/ethnicity unknown                                                                                      float64
DRVEF2012.Percent of total enrollment that are Nonresident Alien                                                                                           float64
DRVEF2012.Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander                                                                      float64
DRVEF2012.Percent of total enrollment that are women                                                                                                       float64
EF2012D.Student-to-faculty ratio                                                                                                                           float64
EF2012D.Full-time retention rate, 2012                                                                                                                     float64
EF2012D.Part-time retention rate, 2012                                                                                                                     float64
DRVEF2012.Percent of undergraduate enrollment 18-24                                                                                                        float64
DRVEF2012.Percent of undergraduate enrollment, 25-64                                                                                                       float64
DRVEF2012.Percent of undergraduate enrollment over 65                                                                                                      float64
DRVEFDE2012.Percent of students enrolled exclusively in distance education courses                                                                         float64
DRVEFDE2012.Percent of students enrolled in some but not all distance education courses                                                                    float64
DRVEFDE2012.Percent of students not enrolled in any distance education courses                                                                             float64
DRVGR2012.Graduation rate, total cohort                                                                                                                    float64
DRVGR2012.Transfer-out rate, total cohort                                                                                                                  float64
GR200_12.Graduation rate - degree/certificate within 100% of normal time                                                                                   float64
DRVGR2012.Graduation rate - bachelor's degree within 4 years, total                                                                                        float64
SFA1112.Percent of full-time first-time undergraduates receiving any financial aid                                                                         float64
SFA1112.Percent of full-time first-time undergraduates receiving federal, state, local or institutional grant aid                                          float64
SFA1112.Average amount of federal, state, local or institutional grant aid received                                                                        float64
DRVF2012.Core revenues, total dollars (GASB)                                                                                                               float64
DRVF2012.Tuition and fees as a percent of core revenues (GASB)                                                                                             float64
DRVF2012.State appropriations as percent of core revenues  (GASB)                                                                                          float64
DRVF2012.Local appropriations as a percent of core revenues (GASB)                                                                                         float64
DRVF2012.Government grants and contracts as a percent of core revenues (GASB)                                                                              float64
DRVF2012.Private gifts, grants, and contracts as a percent of core revenues (GASB)                                                                         float64
DRVF2012.Investment return as a percent of core revenues (GASB)                                                                                            float64
DRVF2012.Other revenues as a percent of core revenues (GASB)                                                                                               float64
DRVF2012.Core revenues, total dollars (FASB)                                                                                                               float64
DRVF2012.Tuition and fees as a percent of core revenues (FASB)                                                                                             float64
DRVF2012.Government grants and contracts as a percent of core revenues (FASB)                                                                              float64
DRVF2012.Private gifts, grants, contracts/contributions from affiliated entities as a percent of core revenues (FASB)                                      float64
DRVF2012.Investment return as a percent of core revenues (FASB)                                                                                            float64
DRVF2012.Other revenues as a percent of core revenues (FASB)                                                                                               float64
DRVF2012.Core revenues, total dollars (for-profit institutions)                                                                                            float64
DRVF2012.Tuition and fees as a percent of core revenues (for-profit institutions)                                                                          float64
DRVF2012.Govenment appropriations, grants, and contracts as a percent of core revenues (for-profit institutions)                                           float64
DRVF2012.Sales and services of educational activities as a percent of core revenues (for-profit institutions)                                              float64
DRVF2012.Other revenues as a percent of core revenues (for-profit institutions)                                                                            float64
DRVF2012.Revenues from tuition and fees per FTE (GASB)                                                                                                     float64
DRVF2012.Revenues from tuition and fees per FTE (FASB)                                                                                                     float64
DRVF2012.Core expenses, total dollars (GASB)                                                                                                               float64
DRVF2012.Instruction expenses as a percent of total core expenses (GASB)                                                                                   float64
DRVF2012.Research expenses as a percent of total core expenses (GASB)                                                                                      float64
DRVF2012.Public service expenses as a percent of total core expenses (GASB)                                                                                float64
DRVF2012.Academic support expenses as a percent of total core expenses (GASB)                                                                              float64
DRVF2012.Student service expenses as a percent of total core expenses (GASB)                                                                               float64
DRVF2012.Institutional support expenses as a percent of total core expenses (GASB)                                                                         float64
DRVF2012.Other core expenses as a percent of total core expenses (GASB)                                                                                    float64
DRVF2012.Core expenses, total dollars (FASB)                                                                                                               float64
DRVF2012.Instruction expenses as a percent of total core expenses (FASB)                                                                                   float64
DRVF2012.Research expenses as a percent of total core expenses (FASB)                                                                                      float64
DRVF2012.Public service expenses as a percent of total core expenses (FASB)                                                                                float64
DRVF2012.Academic support expenses as a percent of total core expenses (FASB)                                                                              float64
DRVF2012.Student service expenses as a percent of total core expenses (FASB)                                                                               float64
DRVF2012.Institutional support expenses as a percent of total core expenses (FASB)                                                                         float64
DRVF2012.Other core expenses as a percent of total core expenses (FASB)                                                                                    float64
DRVF2012.Core expenses, total dollars (for-profit institutons)                                                                                             float64
DRVF2012.Instruction expenses as a percent of total core expenses (for-profit institutions)                                                                float64
DRVF2012.Academic and institutional support, and student service expenses as a percent of total core expenses (for-profit institutions)                    float64
DRVF2012.Other core expenses as a percent of total  core expenses (for-profit institutions)                                                                float64
DRVF2012.Instruction expenses per FTE  (GASB)                                                                                                              float64
DRVF2012.Research expenses per FTE  (GASB)                                                                                                                 float64
DRVF2012.Public service expenses per FTE (GASB)                                                                                                            float64
DRVF2012.Academic support expenses per FTE (GASB)                                                                                                          float64
DRVF2012.Student service expenses per FTE (GASB)                                                                                                           float64
DRVF2012.Institutional support expenses per FTE (GASB)                                                                                                     float64
DRVF2012.All other core expenses per FTE (GASB)                                                                                                            float64
DRVF2012.Instruction expenses per FTE  (FASB)                                                                                                              float64
DRVF2012.Research expenses per FTE (FASB)                                                                                                                  float64
DRVF2012.Public service expenses per FTE (FASB)                                                                                                            float64
DRVF2012.Academic support expenses per FTE (FASB)                                                                                                          float64
DRVF2012.Student service expenses per FTE (FASB)                                                                                                           float64
DRVF2012.Institutional support expenses per FTE (FASB)                                                                                                     float64
DRVF2012.All other core expenses per FTE (FASB)                                                                                                            float64
DRVF2012.Instruction expenses per FTE (for-profit institutions)                                                                                            float64
DRVF2012.Academic and institutional support, and student services  expense per FTE (for-profit institutions)                                               float64
DRVF2012.All other core expenses per FTE (for-profit institutions)                                                                                         float64
DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (GASB)                                                float64
DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (GASB)                                       float64
DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (GASB)                                             float64
DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (GASB)                                 float64
DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (GASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (GASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (GASB)                   float64
DRVF2012.Salaries, wages, and benefit expenses for other core expense functions  as a percent of total expenses for other core expense functions (GASB)    float64
DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (GASB)                                                                 float64
DRVF2012.Total salaries and wage expenses as a percent of total expenses (GASB)                                                                            float64
DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (FASB)                                                float64
DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (FASB)                                       float64
DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (FASB)                                             float64
DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (FASB)                                 float64
DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (FASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (FASB)                             float64
DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (FASB)                   float64
DRVF2012.Salaries, wages, and benefit expenses for other core expense functions  as a percent of total expenses for other core expense functions (FASB)    float64
DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (FASB)                                                                 float64
DRVF2012.Total salaries and wage expenses as a percent of total expenses (FASB)                                                                            float64
DRVF2012.Endowment assets (year end) per FTE enrollment (GASB)                                                                                             float64
DRVF2012.Endowment assets (year end) per FTE enrollment (FASB)                                                                                             float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - all ranks                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time insructional staff - professors                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - associate professors                                                       float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - assistant professors                                                       float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - instructors                                                                float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - lecturers                                                                  float64
DRVHR2012.Average salary equated to 9 months of full-time instructional staff - No academic rank                                                           float64
DRVHR2012.Total FTE staff                                                                                                                                  int64
DRVHR2012.Postsecondary Teachers FTE staff                                                                                                                 int64
DRVHR2012.Postsecondary Teachers Instructional FTE                                                                                                         int64
DRVHR2012.Postsecondary Teachers Research FTE                                                                                                              int64
DRVHR2012.Postsecondary Teachers Public Service FTE                                                                                                        int64
DRVHR2012.Librarians, Curators, and Archivists and other teaching and Instructional support occupations                                                    int64
DRVHR2012.Librarians, Curators, and Archivists FTE                                                                                                         int64
DRVHR2012.Other teaching and Instructional Support FTE                                                                                                     int64
DRVHR2012.Management FTE                                                                                                                                   int64
DRVHR2012.Business and Financial Operations FTE                                                                                                            int64
DRVHR2012.Computer, Engineering, and Science FTE                                                                                                           int64
DRVHR2012.Community Service, Legal, Arts, and Media FTE                                                                                                    int64
DRVHR2012.Healthcare FTE                                                                                                                                   int64
DRVHR2012.Service, sales, office/admin support, natural resources, construction, maintenance, production, transportation & materials moving FTE            int64
DRVHR2012.Service FTE                                                                                                                                      int64
DRVHR2012.Sales and Related FTE                                                                                                                            int64
DRVHR2012.Office and Administrative Support FTE                                                                                                            int64
DRVHR2012.Natural Resources, Construction, and Maintenance FTE                                                                                             int64
DRVHR2012.Production, Transportation, and Material Moving FTE                                                                                              int64
SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10                                                                             float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12.1                                                                           float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11.1                                                                           float64
SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10.1                                                                           float64
dtypes: float64(130), int64(21), object(11)
In [352]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4182 entries, 1809 to 6927
Data columns (total 54 columns):
department_id                    4182 non-null int64
payaverage                       4182 non-null int64
paytype                          662 non-null float64
user_supplied_department_name    4177 non-null object
course_type                      0 non-null float64
contract_type                    0 non-null object
period_term                      4121 non-null float64
period_year                      4121 non-null float64
has_contract                     0 non-null float64
has_retirement                   3878 non-null float64
has_health_insurance             3914 non-null float64
has_governance                   3897 non-null float64
has_union                        3533 non-null float64
union_name                       3515 non-null object
is_school_rep                    4182 non-null int64
course_syllabus                  4083 non-null float64
office_space                     4003 non-null float64
student_evals                    3483 non-null float64
has_advanced_degree              4182 non-null object
paytype_credits                  486 non-null float64
paytype_class_size               18 non-null float64
paytype_hours                    168 non-null float64
paytype_weeks                    168 non-null float64
paytype_courses                  103 non-null float64
department_type_id               4182 non-null int64
display_name                     4182 non-null object
department_type_name             4182 non-null object
college_class                    4182 non-null object
college_carnegie                 4182 non-null object
college_unit_id                  4182 non-null int64
college_state                    4182 non-null object
college_id                       4182 non-null int64
college_hbcu                     0 non-null float64
college_flagship                 0 non-null float64
college_name                     4182 non-null object
public                           4182 non-null bool
private                          4182 non-null bool
for-profit                       4182 non-null bool
private not-for-profit           4182 non-null bool
division                         4182 non-null object
two_year                         4182 non-null float64
degree_JD                        4182 non-null float64
degree_MA/MS                     4182 non-null float64
degree_MD                        4182 non-null float64
degree_None                      4182 non-null float64
degree_PhD                       4182 non-null float64
degree_nan                       4182 non-null float64
div_Arts/Hum                     4182 non-null float64
div_Gen/LibA                     4182 non-null float64
div_Other                        4182 non-null float64
div_Prof/ApS                     4182 non-null float64
div_Sci/Tech                     4182 non-null float64
div_SocSci                       4182 non-null float64
any_health_ins                   3663 non-null float64
dtypes: bool(4), float64(33), int64(6), object(11)