import numpy as np
import pandas as pd
from pylab import *
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
df = pd.read_csv('data/adjunct.csv')
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 6929 entries, 0 to 6928 Data columns (total 35 columns): department_id 6929 non-null int64 payaverage 6929 non-null int64 paytype 2593 non-null float64 user_supplied_department_name 6719 non-null object course_type 1471 non-null float64 contract_type 1724 non-null object period_term 4517 non-null float64 period_year 4517 non-null float64 has_contract 0 non-null float64 has_retirement 6224 non-null float64 has_health_insurance 6249 non-null float64 has_governance 6202 non-null float64 has_union 5797 non-null float64 union_name 4348 non-null object is_school_rep 6929 non-null int64 course_syllabus 4503 non-null float64 office_space 4429 non-null float64 student_evals 3857 non-null float64 has_advanced_degree 4532 non-null float64 paytype_credits 566 non-null float64 paytype_class_size 34 non-null float64 paytype_hours 216 non-null float64 paytype_weeks 216 non-null float64 paytype_courses 111 non-null float64 department_type_id 6929 non-null int64 display_name 6929 non-null object department_type_name 6929 non-null object college_class 6929 non-null object college_carnegie 6929 non-null object college_unit_id 6929 non-null int64 college_state 6929 non-null object college_id 6929 non-null int64 college_hbcu 0 non-null float64 college_flagship 0 non-null float64 college_name 6929 non-null object dtypes: float64(20), int64(6), object(9)
Replace had_advanced_degree
with meaningful labels.
degree_map = {0: 'None', 1: 'MA/MS', 2: 'PhD', 3: 'JD', 4: 'MD'}
df.has_advanced_degree = df.has_advanced_degree.replace(degree_map)
axes = df.payaverage.hist(bins=math.sqrt(2000), by=df.has_union, normed=True)
axes[0].set_title("Not Unionized")
axes[1].set_title("Unionized")
<matplotlib.text.Text at 0x10dde8a50>
df.course_type.unique()
array([ 0., 1., nan])
axes = df.payaverage.hist(bins=math.sqrt(2000), by=df.course_type, normed=True)
axes = df.payaverage.hist(bins=math.sqrt(1000), by=df.has_advanced_degree, normed=True)
df.groupby('has_advanced_degree').payaverage.describe().unstack()
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
has_advanced_degree | ||||||||
JD | 62 | 3620.854839 | 2647.832339 | 1200 | 2200.00 | 3000.0 | 3787.50 | 18000 |
MA/MS | 2700 | 2796.370370 | 1393.114004 | 300 | 1950.00 | 2500.0 | 3300.00 | 20000 |
MD | 6 | 2946.833333 | 1535.190466 | 1386 | 1735.00 | 2591.5 | 4083.25 | 5082 |
None | 200 | 3145.580000 | 1667.177476 | 930 | 1993.75 | 2725.0 | 3753.75 | 9945 |
PhD | 1564 | 3543.102302 | 1724.795824 | 450 | 2398.75 | 3066.0 | 4481.25 | 15000 |
5 rows × 8 columns
df.boxplot('payaverage', by='has_advanced_degree')
<matplotlib.axes.AxesSubplot at 0x10a1f3810>
ma = df[df.has_advanced_degree=='MA/MS']
dr = df[df.has_advanced_degree=='PhD']
nodeg = df[df.has_advanced_degree=='None']
print(stats.ttest_ind(ma.payaverage, dr.payaverage))
print(stats.ttest_ind(ma.payaverage, nodeg.payaverage))
(array(-15.428027721651176), 2.6855989136275075e-52) (array(-3.3709109788858163), 0.00075901665280692879)
df.boxplot('payaverage', by='college_class', rot=45)
<matplotlib.axes.AxesSubplot at 0x10d32ff10>
df.college_class.unique()
array(['4-year public', '2-year public', '4-year private not-for-profit', '4-year private for-profit', 'Administrative Unit', '2-year private not-for-profit', '2-year private for-profit', '2-year Public'], dtype=object)
df[df.has_advanced_degree.isin(['MA/MS','PhD','None'])].groupby(['college_class', 'has_advanced_degree']).payaverage.describe().unstack()
count | mean | std | min | 25% | 50% | 75% | max | ||
---|---|---|---|---|---|---|---|---|---|
college_class | has_advanced_degree | ||||||||
2-year Public | MA/MS | 9 | 2426.444444 | 479.757780 | 1800 | 1800.00 | 2600.0 | 2762.0 | 2899 |
PhD | 2 | 2755.000000 | 205.060967 | 2610 | 2682.50 | 2755.0 | 2827.5 | 2900 | |
2-year private for-profit | MA/MS | 35 | 1467.371429 | 491.882763 | 500 | 1102.00 | 1500.0 | 1850.0 | 2400 |
None | 2 | 1258.000000 | 223.445743 | 1100 | 1179.00 | 1258.0 | 1337.0 | 1416 | |
PhD | 4 | 1286.250000 | 225.291478 | 1125 | 1181.25 | 1200.0 | 1305.0 | 1620 | |
2-year private not-for-profit | MA/MS | 7 | 1712.142857 | 590.224937 | 1050 | 1392.50 | 1500.0 | 1900.0 | 2850 |
2-year public | MA/MS | 812 | 2369.863300 | 1135.398714 | 450 | 1692.00 | 2116.5 | 2700.0 | 12000 |
None | 68 | 2580.294118 | 1597.194500 | 930 | 1530.00 | 2190.0 | 2994.0 | 9945 | |
PhD | 223 | 2631.869955 | 1287.654973 | 713 | 1848.00 | 2340.0 | 3000.0 | 9920 | |
4-year private for-profit | MA/MS | 142 | 1780.612676 | 733.041501 | 666 | 1300.00 | 1647.5 | 2100.0 | 6000 |
None | 5 | 1620.000000 | 433.517589 | 1200 | 1250.00 | 1525.0 | 1925.0 | 2200 | |
PhD | 75 | 2073.373333 | 767.782887 | 595 | 1587.50 | 2000.0 | 2476.5 | 5620 | |
4-year private not-for-profit | MA/MS | 839 | 3099.891538 | 1514.441550 | 300 | 2100.00 | 2850.0 | 3800.0 | 20000 |
None | 66 | 3700.212121 | 1711.597810 | 1060 | 2500.00 | 3112.5 | 4800.0 | 8000 | |
PhD | 672 | 3812.418155 | 1868.524017 | 450 | 2506.00 | 3300.0 | 4803.0 | 15000 | |
4-year public | MA/MS | 845 | 3152.487574 | 1387.765105 | 600 | 2250.00 | 3000.0 | 3610.0 | 18000 |
None | 59 | 3369.932203 | 1480.968531 | 1000 | 2400.00 | 3300.0 | 3924.5 | 9360 | |
PhD | 580 | 3802.272414 | 1568.430001 | 800 | 2500.00 | 3500.0 | 4825.0 | 12200 | |
Administrative Unit | MA/MS | 11 | 2107.454545 | 459.555299 | 1600 | 1747.50 | 2100.0 | 2265.5 | 3156 |
PhD | 8 | 2635.500000 | 1053.929653 | 1200 | 1800.00 | 2550.0 | 3550.0 | 3984 |
20 rows × 8 columns
df['public'] = df.college_class.str.contains('public|Public')
df['private'] = df.college_class.str.contains('private')
df['for-profit'] = df.college_class.str.contains(' for-profit') # the leading space ensures it doesn't match "not-for-profit"
df['private not-for-profit'] = (df.private & ~df['for-profit'])
df['2-year'] = df.college_class.str.contains('2')
df['2-year'][df.college_class == 'Administrative Unit'] = np.nan
df['2-year'].value_counts()
0 5137 1 1751 dtype: int64
df.department_type_name.value_counts()
English 1114 Business 346 Arts 327 Composition, Rhetoric, Writing 325 History 296 Not Specified 288 Humanities 244 Psychology 241 General Studies 238 Liberal Arts 236 Education 204 Social Sciences 204 Communications 192 Sociology 188 Biology 181 ... Environmental Studies and Forestry 22 Public Administration 22 Area Studies 19 Cultural and Ethnic Studies 17 Linguistics 14 Other 9 Family and Consumer Science 7 Divinity 7 Statistics 4 Archaeology 4 Landscape Architecture 3 Librarian 2 Transportation 2 Systems Science 1 Agriculture 1 Length: 63, dtype: int64
Using the coding listed in the data dictionary:
departments = {
101000: 'General Studies',
102000: 'Liberal Arts',
200000: 'Humanities',
201000: 'History',
202000: 'Linguistics',
203000: 'Literature',
203100: 'Foreign Language',
203200: 'English',
203300: 'Composition, Rhetoric, Writing',
204000: 'Arts',
204100: 'Performing Arts',
204200: 'Visual Arts',
204300: 'Film and Media',
204400: 'Music',
205000: 'Philosophy',
206000: 'Religion',
300000: 'Social Sciences',
301000: 'Anthropology',
302000: 'Archaeology',
303000: 'Area Studies',
304000: 'Cultural and Ethnic Studies',
305000: 'Economics',
306000: 'Gender and Sexuality Studies',
307000: 'Geography',
308000: 'Political Science',
308100: 'International Relations',
309000: 'Psychology',
310000: 'Sociology',
400000: 'Science and Technology',
401000: 'Space Science',
402000: 'Earth Sciences',
403000: 'Biology',
404000: 'Chemistry',
405000: 'Physics',
406000: 'Engineering',
407000: 'Computer Sciences',
408000: 'Mathematics',
409000: 'Statistics',
410000: 'Systems Science',
600000: 'Professions and Applied sciences',
601000: 'Agriculture',
602000: 'Architecture and Design',
602100: 'Landscape Architecture',
603000: 'Business',
603100: 'Accounting',
604000: 'Divinity',
605000: 'Education',
607000: 'Environmental Studies and Forestry',
608000: 'Family and Consumer Science',
609000: 'Health Science',
609100: 'Nursing',
610000: 'Human Physical Performance and Recreation',
611000: 'Communications',
612000: 'Law',
613000: 'Library and Museum Studies',
614000: 'Military Sciences',
615000: 'Public Administration',
616000: 'Social Work',
617000: 'Transportation',
618000: 'Criminal Studies',
888888: 'Not Specified',
999999: 'Other'}
#df['division'] =
'''
divisions = {
'1': 'General Studies/Liberal Arts',
'2': 'Arts/Humanities',
'3': 'Social Sciences',
'4': 'Science/Technology',
'6': 'Professions/Applied Science',
'8': np.nan,
'9': 'Other'
}
'''
divisions = {
'1': 'Gen/LibA',
'2': 'Arts/Hum',
'3': 'SocSci',
'4': 'Sci/Tech',
'6': 'Prof/ApS',
'8': np.nan,
'9': 'Other'
}
df['division'] = df.department_type_id.apply(lambda x: str(x)[0:1]).replace(divisions)
df.groupby(['public']).payaverage.describe().unstack()
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
public | ||||||||
False | 2911 | 3193.933356 | 1701.327580 | 300 | 2100 | 2820.0 | 3900.00 | 20000 |
True | 4018 | 2957.375809 | 1410.249553 | 450 | 2000 | 2567.5 | 3574.75 | 18000 |
2 rows × 8 columns
df.boxplot('payaverage', by='private not-for-profit', rot=45)
df.groupby(['private not-for-profit']).payaverage.describe().unstack()
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
private not-for-profit | ||||||||
False | 4425 | 2859.963164 | 1397.415906 | 450 | 1950.00 | 2500 | 3483 | 18000 |
True | 2504 | 3404.528355 | 1718.649341 | 300 | 2278.75 | 3000 | 4000 | 20000 |
2 rows × 8 columns
df.boxplot('payaverage', by='division', rot=90)
df.groupby(['division']).payaverage.describe().unstack()
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
division | ||||||||
Arts/Humanities | 3245 | 3027.400308 | 1452.950085 | 450 | 2001 | 2700 | 3700 | 15500 |
General Studies/Liberal Arts | 474 | 2681.419831 | 1318.727846 | 300 | 1800 | 2400 | 3200 | 10500 |
Other | 9 | 2656.888889 | 1209.167838 | 1138 | 1895 | 2475 | 3324 | 5000 |
Professions/Applied Science | 1203 | 3158.650873 | 1757.174617 | 450 | 2100 | 2800 | 3800 | 20000 |
Science/Technology | 691 | 3239.274964 | 1677.514657 | 713 | 2100 | 2800 | 4000 | 12000 |
Social Sciences | 1019 | 3156.970559 | 1582.704235 | 508 | 2100 | 2800 | 3800 | 12575 |
6 rows × 8 columns
import statsmodels.formula.api as sm
from pandas.stats.api import ols
pd.concat([data, pd.get_dummies(data.division, prefix="div")])
2-year | college_carnegie | college_class | college_flagship | college_hbcu | college_id | college_name | college_state | college_unit_id | contract_type | course_syllabus | course_type | degree_JD | degree_MA/MS | degree_MD | degree_None | degree_PhD | degree_nan | department_id | department_type_id | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1809 | 0 | Doctoral and Research Universities | 4-year private not-for-profit | NaN | NaN | 737 | American University | District of Columbia | 131159 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 198 | 611000 | ... |
1830 | 0 | Masters Colleges and Universities--larger prog... | 4-year public | NaN | NaN | 538 | San Jose State University | California | 122755 | NaN | NaN | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 1905 | 611000 | ... |
1920 | 0 | Doctoral and Research Universities | 4-year private not-for-profit | NaN | NaN | 737 | American University | District of Columbia | 131159 | NaN | 2 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 198 | 611000 | ... |
1921 | 0 | Research Universities--very high research acti... | 4-year public | NaN | NaN | 3340 | University of South Carolina at Columbia | South Carolina | 218663 | NaN | 3 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1985 | 611000 | ... |
1922 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 2840 | Kent State University | Ohio | 203517 | NaN | 1 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1986 | 611100 | ... |
1923 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 2840 | Kent State University | Ohio | 203517 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1986 | 611100 | ... |
1924 | 0 | Research Universities--very high research acti... | 4-year private not-for-profit | NaN | NaN | 2351 | Columbia University | New York | 190150 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1987 | 611100 | ... |
1926 | 0 | Research Universities--very high research acti... | 4-year public | NaN | NaN | 3340 | University of South Carolina at Columbia | South Carolina | 218663 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1985 | 611000 | ... |
1928 | 0 | Doctoral and Research Universities | 4-year private not-for-profit | NaN | NaN | 1041 | DePaul University | Illinois | 144740 | NaN | 3 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 1990 | 204100 | ... |
1929 | 0 | Research Universities--very high research acti... | 4-year private not-for-profit | NaN | NaN | 1027 | University of Chicago | Illinois | 144050 | NaN | 3 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 1991 | 204100 | ... |
1930 | 0 | Research Universities--very high research acti... | 4-year private not-for-profit | NaN | NaN | 1027 | University of Chicago | Illinois | 144050 | NaN | 2 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 1992 | 204000 | ... |
1931 | 0 | Research Universities--very high research acti... | 4-year public | NaN | NaN | 2691 | University of North Carolina at Chapel Hill | North Carolina | 199120 | NaN | 1 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 782 | 611100 | ... |
1932 | 0 | Research Universities--very high research acti... | 4-year public | NaN | NaN | 3832 | University of Washington | Washington | 236948 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1994 | 611000 | ... |
1933 | 0 | Baccalaureate Colleges--Arts and Sciences | 4-year private not-for-profit | NaN | NaN | 2139 | Doane College | Nebraska | 181020 | NaN | 3 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 792 | 203200 | ... |
1934 | 1 | Associates--Public Suburban-serving Single Campus | 2-year public | NaN | NaN | 4036 | Warren County Community College | New Jersey | 245625 | NaN | 1 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1995 | 200000 | ... |
1935 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 15 | University of Alabama at Tuscaloosa | Alabama | 100751 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 1996 | 200000 | ... |
1936 | 0 | Baccalaureate Colleges--Arts and Sciences | 4-year private not-for-profit | NaN | NaN | 3269 | Washington and Jefferson College | Pennsylvania | 216667 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1893 | 203200 | ... |
1937 | 0 | Research Universities--very high research acti... | 4-year private not-for-profit | NaN | NaN | 3208 | University of Pennsylvania | Pennsylvania | 215062 | NaN | 3 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 1997 | 203300 | ... |
1938 | 0 | Doctoral and Research Universities | 4-year private not-for-profit | NaN | NaN | 737 | American University | District of Columbia | 131159 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 198 | 611000 | ... |
1939 | 0 | Masters Colleges and Universities--medium prog... | 4-year private not-for-profit | NaN | NaN | 1327 | Upper Iowa University | Iowa | 154493 | NaN | 1 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 1998 | 102000 | ... |
1940 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 3936 | University of Wisconsin at Milwaukee | Wisconsin | 240453 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 1999 | 203200 | ... |
1941 | 0 | Masters Colleges and Universities--larger prog... | 4-year public | NaN | NaN | 3585 | Texas State University at San Marcos | Texas | 228459 | NaN | 3 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2000 | 203200 | ... |
1942 | 1 | Associates--Public Suburban-serving Single Campus | 2-year public | NaN | NaN | 3352 | Tri-County Technical College | South Carolina | 218885 | NaN | 0 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2001 | 203200 | ... |
1943 | 0 | Research Universities--very high research acti... | 4-year private not-for-profit | NaN | NaN | 1123 | Northwestern University | Illinois | 147767 | NaN | 2 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 2002 | 611100 | ... |
1944 | 0 | Doctoral and Research Universities | 4-year public | NaN | NaN | 1782 | Central Michigan University | Michigan | 169248 | NaN | 3 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 2003 | 611100 | ... |
1945 | 1 | Associates--Public Suburban-serving Single Campus | 2-year public | NaN | NaN | 3070 | Butler County Community College (Pa.) | Pennsylvania | 211343 | NaN | 1 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2004 | 203200 | ... |
1946 | 0 | Masters Colleges and Universities--larger prog... | 4-year public | NaN | NaN | 3147 | Lincoln University (Pa.) | Pennsylvania | 213598 | NaN | 2 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 2005 | 611000 | ... |
1947 | 0 | Masters Colleges and Universities--larger prog... | 4-year private not-for-profit | NaN | NaN | 698 | University of Hartford | Connecticut | 129525 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2006 | 605000 | ... |
1948 | 0 | Baccalaureate Colleges--Diverse Fields | 4-year private not-for-profit | NaN | NaN | 2388 | Elmira College | New York | 190983 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2007 | 102000 | ... |
1950 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 1705 | University of Massachusetts at Lowell | Massachusetts | 166513 | NaN | NaN | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2010 | 309000 | ... |
1954 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 5 | Auburn University | Alabama | 100858 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2012 | 611100 | ... |
1955 | 1 | Associates--Public Rural-serving Medium | 2-year public | NaN | NaN | 2097 | Three Rivers Community College (Mo.) | Missouri | 179645 | NaN | 1 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 2013 | 102000 | ... |
1956 | 1 | Associates--Public Urban-serving Multicampus | 2-year public | NaN | NaN | 149 | Pima Community College | Arizona | 105525 | NaN | 1 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2014 | 605000 | ... |
1957 | 0 | Masters Colleges and Universities--larger prog... | 4-year public | NaN | NaN | 1609 | University of Maryland University College | Maryland | 163204 | NaN | 0 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 613 | 407000 | ... |
1958 | 0 | Masters Colleges and Universities--larger prog... | 4-year public | NaN | NaN | 3927 | University of Wisconsin at La Crosse | Wisconsin | 240329 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2015 | 203100 | ... |
1960 | 1 | Associates--Public Rural-serving Large | 2-year public | NaN | NaN | 761 | Brevard Community College | Florida | 132693 | NaN | 2 | NaN | 0 | 0 | 0 | 1 | 0 | 0 | 2017 | 308100 | ... |
1961 | 0 | Doctoral and Research Universities | 4-year private not-for-profit | NaN | NaN | 3124 | Immaculata University | Pennsylvania | 213011 | NaN | 3 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2018 | 600000 | ... |
1963 | 0 | Doctoral and Research Universities | 4-year private not-for-profit | NaN | NaN | 757 | Barry University | Florida | 132471 | NaN | 0 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2020 | 102000 | ... |
1965 | 0 | Masters Colleges and Universities--larger prog... | 4-year public | NaN | NaN | 3718 | James Madison University | Virginia | 232423 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2023 | 603000 | ... |
1966 | 0 | Baccalaureate Colleges--Diverse Fields | 4-year private not-for-profit | NaN | NaN | 3092 | Delaware Valley College | Pennsylvania | 211981 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2024 | 603000 | ... |
1967 | 0 | Research Universities--high research activity | 4-year private not-for-profit | NaN | NaN | 3099 | Duquesne University | Pennsylvania | 212106 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2025 | 609000 | ... |
1968 | 0 | Masters Colleges and Universities--larger prog... | 4-year private not-for-profit | NaN | NaN | 2589 | Touro College (N.Y.) | New York | 196592 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2026 | 605000 | ... |
1969 | 1 | Associates--Public Urban-serving Multicampus | 2-year public | NaN | NaN | 2810 | Cuyahoga Community College | Ohio | 202356 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2027 | 203200 | ... |
1970 | 0 | Research Universities--very high research acti... | 4-year public | NaN | NaN | 3218 | University of Pittsburgh main campus | Pennsylvania | 215293 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2028 | 605000 | ... |
1971 | 0 | Associates--Public Urban-serving Multicampus | 4-year public | NaN | NaN | 762 | Broward College | Florida | 132709 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 1506 | 309000 | ... |
1972 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 3421 | University of Memphis | Tennessee | 220862 | NaN | 0 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2029 | 605000 | ... |
1974 | 0 | Masters Colleges and Universities--smaller pro... | 4-year private not-for-profit | NaN | NaN | 1682 | Emmanuel College (Mass.) | Massachusetts | 165671 | NaN | 1 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 527 | 400000 | ... |
1975 | 0 | Research Universities--very high research acti... | 4-year public | NaN | NaN | 1612 | University of Maryland at College Park | Maryland | 163286 | NaN | 3 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2031 | 204400 | ... |
1976 | 1 | Associates--Public Rural-serving Large | 2-year public | NaN | NaN | 4179 | NorthWest Arkansas Community College | Arkansas | 367459 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2032 | 407000 | ... |
1977 | 1 | Associates--Public Suburban-serving Multicampus | 2-year public | NaN | NaN | 2270 | Union County College | New Jersey | 187198 | NaN | 0 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2033 | 200000 | ... |
1978 | 0 | Doctoral and Research Universities | 4-year private not-for-profit | NaN | NaN | 2262 | Seton Hall University | New Jersey | 186584 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2034 | 201000 | ... |
1979 | 0 | Associates--Public Suburban-serving Single Campus | 4-year public | NaN | NaN | 845 | Seminole State College of Florida | Florida | 137209 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2035 | 407000 | ... |
1980 | 0 | Masters Colleges and Universities--larger prog... | 4-year private not-for-profit | NaN | NaN | 2704 | Pfeiffer University | North Carolina | 199306 | NaN | 3 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2036 | 407000 | ... |
1981 | 0 | Masters Colleges and Universities--larger prog... | 4-year public | NaN | NaN | 2242 | Kean University | New Jersey | 185262 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2037 | 308000 | ... |
1982 | 1 | Associates--Public Urban-serving Multicampus | 2-year public | NaN | NaN | 2230 | Essex County College | New Jersey | 184481 | NaN | 1 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2038 | 201000 | ... |
1983 | 0 | Research Universities--very high research acti... | 4-year private not-for-profit | NaN | NaN | 2355 | Cornell University | New York | 190415 | NaN | 2 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2039 | 602000 | ... |
1984 | 0 | Research Universities--high research activity | 4-year public | NaN | NaN | 627 | University of Colorado at Denver | Colorado | 126562 | NaN | NaN | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2040 | 605000 | ... |
1985 | 0 | Masters Colleges and Universities--larger prog... | 4-year private not-for-profit | NaN | NaN | 3231 | Robert Morris University (Pa.) | Pennsylvania | 215655 | NaN | 3 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2041 | 605000 | ... |
1986 | 0 | Masters Colleges and Universities--larger prog... | 4-year private not-for-profit | NaN | NaN | 1750 | Simmons College | Massachusetts | 167783 | NaN | 3 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2042 | 613000 | ... |
1987 | 0 | Research Universities--high research activity | 4-year private not-for-profit | NaN | NaN | 3464 | Baylor University | Texas | 223232 | NaN | 2 | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 2043 | 605000 | ... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
8364 rows × 58 columns
data = pd.concat([df, pd.get_dummies(df.has_advanced_degree, prefix='degree', dummy_na=True)], axis=1)
data = pd.concat([data, pd.get_dummies(data.division, prefix="div")], axis=1)
data.info()
data.filter(like='degree')
<class 'pandas.core.frame.DataFrame'> Int64Index: 6929 entries, 0 to 6928 Data columns (total 53 columns): department_id 6929 non-null int64 payaverage 6929 non-null int64 paytype 2593 non-null float64 user_supplied_department_name 6719 non-null object course_type 1471 non-null float64 contract_type 1724 non-null object period_term 4517 non-null float64 period_year 4517 non-null float64 has_contract 0 non-null float64 has_retirement 6224 non-null float64 has_health_insurance 6249 non-null float64 has_governance 6202 non-null float64 has_union 5797 non-null float64 union_name 4348 non-null object is_school_rep 6929 non-null int64 course_syllabus 4503 non-null float64 office_space 4429 non-null float64 student_evals 3857 non-null float64 has_advanced_degree 4532 non-null object paytype_credits 566 non-null float64 paytype_class_size 34 non-null float64 paytype_hours 216 non-null float64 paytype_weeks 216 non-null float64 paytype_courses 111 non-null float64 department_type_id 6929 non-null int64 display_name 6929 non-null object department_type_name 6929 non-null object college_class 6929 non-null object college_carnegie 6929 non-null object college_unit_id 6929 non-null int64 college_state 6929 non-null object college_id 6929 non-null int64 college_hbcu 0 non-null float64 college_flagship 0 non-null float64 college_name 6929 non-null object public 6929 non-null bool private 6929 non-null bool for-profit 6929 non-null bool private not-for-profit 6929 non-null bool division 6641 non-null object 2-year 6888 non-null float64 degree_JD 6929 non-null float64 degree_MA/MS 6929 non-null float64 degree_MD 6929 non-null float64 degree_None 6929 non-null float64 degree_PhD 6929 non-null float64 degree_nan 6929 non-null float64 div_Arts/Hum 6929 non-null float64 div_Gen/LibA 6929 non-null float64 div_Other 6929 non-null float64 div_Prof/ApS 6929 non-null float64 div_Sci/Tech 6929 non-null float64 div_SocSci 6929 non-null float64 dtypes: bool(4), float64(32), int64(6), object(11)
has_advanced_degree | degree_JD | degree_MA/MS | degree_MD | degree_None | degree_PhD | degree_nan | |
---|---|---|---|---|---|---|---|
0 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
1 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
2 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
3 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
4 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
5 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
6 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
7 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
8 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
9 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
10 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
11 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
12 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
13 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
14 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
15 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
16 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
17 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
18 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
19 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
20 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
21 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
22 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
23 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
24 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
25 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
26 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
27 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
28 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
29 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
30 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
31 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
32 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
33 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
34 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
35 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
36 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
37 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
38 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
39 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
40 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
41 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
42 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
43 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
44 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
45 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
46 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
47 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
48 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
49 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
50 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
51 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
52 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
53 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
54 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
55 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
56 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
57 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
58 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
59 | NaN | 0 | 0 | 0 | 0 | 0 | 1 |
... | ... | ... | ... | ... | ... | ... |
6929 rows × 7 columns
data.division.value_counts()
Arts/Hum 3245 Prof/ApS 1203 SocSci 1019 Sci/Tech 691 Gen/LibA 474 Other 9 dtype: int64
data['any_health_ins'] = (data.has_health_insurance.isin([1,2,3])).where(
(data.has_health_insurance.notnull()) & (data.has_health_insurance != 4))
data.filter(like='health')
has_health_insurance | any_health_ins | |
---|---|---|
1809 | 4 | NaN |
1830 | 1 | 1 |
1920 | 0 | 0 |
1921 | 0 | 0 |
1922 | 0 | 0 |
1923 | 0 | 0 |
1924 | 3 | 1 |
1926 | NaN | NaN |
1928 | 4 | NaN |
1929 | 4 | NaN |
1930 | 0 | 0 |
1931 | 0 | 0 |
1932 | 1 | 1 |
1933 | 0 | 0 |
1934 | 0 | 0 |
1935 | 0 | 0 |
1936 | 0 | 0 |
1937 | 0 | 0 |
1938 | 4 | NaN |
1939 | 0 | 0 |
1940 | 1 | 1 |
1941 | 3 | 1 |
1942 | 0 | 0 |
1943 | 0 | 0 |
1944 | 1 | 1 |
1945 | 0 | 0 |
1946 | 0 | 0 |
1947 | 0 | 0 |
1948 | 0 | 0 |
1950 | NaN | NaN |
1954 | 0 | 0 |
1955 | 0 | 0 |
1956 | 0 | 0 |
1957 | 3 | 1 |
1958 | 1 | 1 |
1960 | 0 | 0 |
1961 | 0 | 0 |
1963 | 0 | 0 |
1965 | 0 | 0 |
1966 | 0 | 0 |
1967 | 0 | 0 |
1968 | 0 | 0 |
1969 | 0 | 0 |
1970 | 0 | 0 |
1971 | 0 | 0 |
1972 | 0 | 0 |
1974 | 0 | 0 |
1975 | NaN | NaN |
1976 | 0 | 0 |
1977 | 0 | 0 |
1978 | 0 | 0 |
1979 | 0 | 0 |
1980 | 0 | 0 |
1981 | 0 | 0 |
1982 | 0 | 0 |
1983 | NaN | NaN |
1984 | NaN | NaN |
1985 | 0 | 0 |
1986 | 0 | 0 |
1987 | 0 | 0 |
... | ... |
4182 rows × 2 columns
data = data[(data.has_advanced_degree.isin(['None','MA/MS','PhD'])) &
(data.public | data['private not-for-profit']) &
(data.division.isin(['Arts/Hum', 'Prof/ApS', 'SocSci', 'Sci/Tech', 'Gen/LibA']))]
#(data.division.isin(['Arts/Humanities', 'Professions/Applied Science', 'Social Sciences',
# 'Science/Technology', 'General Studies/Liberal Arts']))]
data.filter(like='div_')
div_Arts/Hum | div_Gen/LibA | div_Other | div_Prof/ApS | div_Sci/Tech | div_SocSci | |
---|---|---|---|---|---|---|
1809 | 0 | 0 | 0 | 1 | 0 | 0 |
1830 | 0 | 0 | 0 | 1 | 0 | 0 |
1920 | 0 | 0 | 0 | 1 | 0 | 0 |
1921 | 0 | 0 | 0 | 1 | 0 | 0 |
1922 | 0 | 0 | 0 | 1 | 0 | 0 |
1923 | 0 | 0 | 0 | 1 | 0 | 0 |
1924 | 0 | 0 | 0 | 1 | 0 | 0 |
1926 | 0 | 0 | 0 | 1 | 0 | 0 |
1928 | 1 | 0 | 0 | 0 | 0 | 0 |
1929 | 1 | 0 | 0 | 0 | 0 | 0 |
1930 | 1 | 0 | 0 | 0 | 0 | 0 |
1931 | 0 | 0 | 0 | 1 | 0 | 0 |
1932 | 0 | 0 | 0 | 1 | 0 | 0 |
1933 | 1 | 0 | 0 | 0 | 0 | 0 |
1934 | 1 | 0 | 0 | 0 | 0 | 0 |
1935 | 1 | 0 | 0 | 0 | 0 | 0 |
1936 | 1 | 0 | 0 | 0 | 0 | 0 |
1937 | 1 | 0 | 0 | 0 | 0 | 0 |
1938 | 0 | 0 | 0 | 1 | 0 | 0 |
1939 | 0 | 1 | 0 | 0 | 0 | 0 |
1940 | 1 | 0 | 0 | 0 | 0 | 0 |
1941 | 1 | 0 | 0 | 0 | 0 | 0 |
1942 | 1 | 0 | 0 | 0 | 0 | 0 |
1943 | 0 | 0 | 0 | 1 | 0 | 0 |
1944 | 0 | 0 | 0 | 1 | 0 | 0 |
1945 | 1 | 0 | 0 | 0 | 0 | 0 |
1946 | 0 | 0 | 0 | 1 | 0 | 0 |
1947 | 0 | 0 | 0 | 1 | 0 | 0 |
1948 | 0 | 1 | 0 | 0 | 0 | 0 |
1950 | 0 | 0 | 0 | 0 | 0 | 1 |
1954 | 0 | 0 | 0 | 1 | 0 | 0 |
1955 | 0 | 1 | 0 | 0 | 0 | 0 |
1956 | 0 | 0 | 0 | 1 | 0 | 0 |
1957 | 0 | 0 | 0 | 0 | 1 | 0 |
1958 | 1 | 0 | 0 | 0 | 0 | 0 |
1960 | 0 | 0 | 0 | 0 | 0 | 1 |
1961 | 0 | 0 | 0 | 1 | 0 | 0 |
1963 | 0 | 1 | 0 | 0 | 0 | 0 |
1965 | 0 | 0 | 0 | 1 | 0 | 0 |
1966 | 0 | 0 | 0 | 1 | 0 | 0 |
1967 | 0 | 0 | 0 | 1 | 0 | 0 |
1968 | 0 | 0 | 0 | 1 | 0 | 0 |
1969 | 1 | 0 | 0 | 0 | 0 | 0 |
1970 | 0 | 0 | 0 | 1 | 0 | 0 |
1971 | 0 | 0 | 0 | 0 | 0 | 1 |
1972 | 0 | 0 | 0 | 1 | 0 | 0 |
1974 | 0 | 0 | 0 | 0 | 1 | 0 |
1975 | 1 | 0 | 0 | 0 | 0 | 0 |
1976 | 0 | 0 | 0 | 0 | 1 | 0 |
1977 | 1 | 0 | 0 | 0 | 0 | 0 |
1978 | 1 | 0 | 0 | 0 | 0 | 0 |
1979 | 0 | 0 | 0 | 0 | 1 | 0 |
1980 | 0 | 0 | 0 | 0 | 1 | 0 |
1981 | 0 | 0 | 0 | 0 | 0 | 1 |
1982 | 1 | 0 | 0 | 0 | 0 | 0 |
1983 | 0 | 0 | 0 | 1 | 0 | 0 |
1984 | 0 | 0 | 0 | 1 | 0 | 0 |
1985 | 0 | 0 | 0 | 1 | 0 | 0 |
1986 | 0 | 0 | 0 | 1 | 0 | 0 |
1987 | 0 | 0 | 0 | 1 | 0 | 0 |
... | ... | ... | ... | ... | ... |
4182 rows × 6 columns
y = data.payaverage
log_y = y.apply(np.log)
x = data[['degree_MA/MS', 'degree_PhD', '2-year', 'public', 'has_union',
'div_Arts/Hum', 'div_Prof/ApS', 'div_SocSci', 'div_Sci/Tech', 'any_health_ins']]
x2 = data[['degree_MA/MS', 'degree_PhD', '2-year', 'public', 'has_union',
'div_Arts/Hum', 'div_Prof/ApS', 'div_SocSci', 'div_Sci/Tech']]
#'divis_Arts/Humanities', 'divis_Professions/Applied Science', 'divis_Social Sciences', 'divis_Science/Technology']]
x.info()
res = ols(y=y, x=x)
res2 = ols(y=log_y, x=x)
<class 'pandas.core.frame.DataFrame'> Int64Index: 4182 entries, 1809 to 6927 Data columns (total 10 columns): degree_MA/MS 4182 non-null float64 degree_PhD 4182 non-null float64 2-year 4182 non-null float64 public 4182 non-null bool has_union 3533 non-null float64 div_Arts/Hum 4182 non-null float64 div_Prof/ApS 4182 non-null float64 div_SocSci 4182 non-null float64 div_Sci/Tech 4182 non-null float64 any_health_ins 3663 non-null float64 dtypes: bool(1), float64(9)
print res
-------------------------Summary of Regression Analysis------------------------- Formula: Y ~ <degree_MA/MS> + <degree_PhD> + <2-year> + <public> + <has_union> + <div_Arts/Hum> + <div_Prof/ApS> + <div_SocSci> + <div_Sci/Tech> + <any_health_ins> + <intercept> Number of Observations: 3225 Number of Degrees of Freedom: 11 R-squared: 0.1861 Adj R-squared: 0.1836 Rmse: 1357.0738 F-stat (10, 3214): 73.5015, p-value: 0.0000 Degrees of Freedom: model 10, resid 3214 -----------------------Summary of Estimated Coefficients------------------------ Variable Coef Std Err t-stat p-value CI 2.5% CI 97.5% -------------------------------------------------------------------------------- degree_MA/MS -249.2784 116.1686 -2.15 0.0320 -476.9690 -21.5879 degree_PhD 270.8295 119.9225 2.26 0.0240 35.7813 505.8776 2-year -786.0221 62.2996 -12.62 0.0000 -908.1294 -663.9149 public -147.7148 57.5558 -2.57 0.0103 -260.5242 -34.9054 has_union -554.4217 377.8007 -1.47 0.1423 -1294.9111 186.0677 -------------------------------------------------------------------------------- div_Arts/Hum 206.9524 104.4024 1.98 0.0475 2.3237 411.5810 div_Prof/ApS 266.6175 113.3500 2.35 0.0187 44.4516 488.7835 div_SocSci 159.1791 116.6479 1.36 0.1725 -69.4508 387.8090 div_Sci/Tech 345.5963 122.0551 2.83 0.0047 106.3682 584.8244 any_health_ins 1014.7623 59.6721 17.01 0.0000 897.8051 1131.7196 -------------------------------------------------------------------------------- intercept 3585.8706 400.6223 8.95 0.0000 2800.6508 4371.0903 ---------------------------------End of Summary---------------------------------
print res2
-------------------------Summary of Regression Analysis------------------------- Formula: Y ~ <degree_MA/MS> + <degree_PhD> + <2-year> + <public> + <has_union> + <div_Arts/Hum> + <div_Prof/ApS> + <div_SocSci> + <div_Sci/Tech> + <any_health_ins> + <intercept> Number of Observations: 3225 Number of Degrees of Freedom: 11 R-squared: 0.2038 Adj R-squared: 0.2014 Rmse: 0.3978 F-stat (10, 3214): 82.2899, p-value: 0.0000 Degrees of Freedom: model 10, resid 3214 -----------------------Summary of Estimated Coefficients------------------------ Variable Coef Std Err t-stat p-value CI 2.5% CI 97.5% -------------------------------------------------------------------------------- degree_MA/MS -0.0522 0.0341 -1.53 0.1254 -0.1189 0.0145 degree_PhD 0.0956 0.0352 2.72 0.0066 0.0267 0.1645 2-year -0.2904 0.0183 -15.91 0.0000 -0.3262 -0.2547 public -0.0221 0.0169 -1.31 0.1897 -0.0552 0.0109 has_union -0.1075 0.1107 -0.97 0.3316 -0.3246 0.1095 -------------------------------------------------------------------------------- div_Arts/Hum 0.0776 0.0306 2.54 0.0113 0.0176 0.1376 div_Prof/ApS 0.0955 0.0332 2.87 0.0041 0.0303 0.1606 div_SocSci 0.0719 0.0342 2.10 0.0356 0.0049 0.1389 div_Sci/Tech 0.1216 0.0358 3.40 0.0007 0.0514 0.1917 any_health_ins 0.2805 0.0175 16.03 0.0000 0.2462 0.3147 -------------------------------------------------------------------------------- intercept 7.9976 0.1174 68.11 0.0000 7.7675 8.2278 ---------------------------------End of Summary---------------------------------
ols(y=log_y, x=x2)
-------------------------Summary of Regression Analysis------------------------- Formula: Y ~ <degree_MA/MS> + <degree_PhD> + <2-year> + <public> + <has_union> + <div_Arts/Hum> + <div_Prof/ApS> + <div_SocSci> + <div_Sci/Tech> + <intercept> Number of Observations: 3533 Number of Degrees of Freedom: 10 R-squared: 0.1433 Adj R-squared: 0.1411 Rmse: 0.4158 F-stat (9, 3523): 65.4917, p-value: 0.0000 Degrees of Freedom: model 9, resid 3523 -----------------------Summary of Estimated Coefficients------------------------ Variable Coef Std Err t-stat p-value CI 2.5% CI 97.5% -------------------------------------------------------------------------------- degree_MA/MS -0.0839 0.0339 -2.48 0.0133 -0.1504 -0.0175 degree_PhD 0.0759 0.0349 2.17 0.0299 0.0074 0.1444 2-year -0.3234 0.0182 -17.80 0.0000 -0.3590 -0.2878 public 0.0326 0.0164 1.99 0.0471 0.0004 0.0648 has_union -0.0659 0.0957 -0.69 0.4910 -0.2535 0.1217 -------------------------------------------------------------------------------- div_Arts/Hum 0.0923 0.0310 2.98 0.0029 0.0316 0.1530 div_Prof/ApS 0.0996 0.0336 2.96 0.0031 0.0337 0.1654 div_SocSci 0.0905 0.0344 2.63 0.0086 0.0231 0.1580 div_Sci/Tech 0.1290 0.0359 3.59 0.0003 0.0586 0.1995 intercept 8.0078 0.1041 76.94 0.0000 7.8038 8.2118 ---------------------------------End of Summary---------------------------------
import statsmodels.formula.api as smf
data.has_advanced_degree.value_counts()
MA/MS 2512 PhD 1477 None 193 dtype: int64
results = smf.ols("log(payaverage) ~ C(has_advanced_degree, Treatment(reference='None'))" \
"*C(division, Treatment(reference='Gen/LibA')) + has_union + public + two_year + C(has_health_insurance)", data=data).fit()
results.summary()
Dep. Variable: | log(payaverage) | R-squared: | 0.209 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.204 |
Method: | Least Squares | F-statistic: | 42.67 |
Date: | Wed, 30 Apr 2014 | Prob (F-statistic): | 3.55e-155 |
Time: | 17:21:03 | Log-Likelihood: | -1689.2 |
No. Observations: | 3406 | AIC: | 3422. |
Df Residuals: | 3384 | BIC: | 3557. |
Df Model: | 21 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | 7.6717 | 0.143 | 53.748 | 0.000 | 7.392 7.952 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS] | 0.2110 | 0.113 | 1.871 | 0.061 | -0.010 0.432 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD] | 0.4117 | 0.118 | 3.492 | 0.000 | 0.181 0.643 |
C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] | 0.3471 | 0.119 | 2.923 | 0.003 | 0.114 0.580 |
C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] | 0.4846 | 0.121 | 3.990 | 0.000 | 0.246 0.723 |
C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] | 0.2858 | 0.130 | 2.201 | 0.028 | 0.031 0.540 |
C(division, Treatment(reference='Gen/LibA'))[T.SocSci] | 0.4744 | 0.161 | 2.953 | 0.003 | 0.159 0.789 |
public[T.True] | -0.0296 | 0.017 | -1.788 | 0.074 | -0.062 0.003 |
C(has_health_insurance)[T.1.0] | 0.3326 | 0.026 | 12.850 | 0.000 | 0.282 0.383 |
C(has_health_insurance)[T.2.0] | 0.2882 | 0.032 | 9.091 | 0.000 | 0.226 0.350 |
C(has_health_insurance)[T.3.0] | 0.2145 | 0.027 | 7.889 | 0.000 | 0.161 0.268 |
C(has_health_insurance)[T.4.0] | 0.0795 | 0.031 | 2.576 | 0.010 | 0.019 0.140 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] | -0.2731 | 0.125 | -2.187 | 0.029 | -0.518 -0.028 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] | -0.3434 | 0.130 | -2.639 | 0.008 | -0.599 -0.088 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] | -0.4019 | 0.129 | -3.127 | 0.002 | -0.654 -0.150 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] | -0.4614 | 0.134 | -3.431 | 0.001 | -0.725 -0.198 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] | -0.1913 | 0.138 | -1.386 | 0.166 | -0.462 0.079 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] | -0.1766 | 0.142 | -1.241 | 0.215 | -0.456 0.103 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] | -0.4115 | 0.167 | -2.471 | 0.014 | -0.738 -0.085 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] | -0.4584 | 0.170 | -2.693 | 0.007 | -0.792 -0.125 |
has_union | -0.0368 | 0.095 | -0.389 | 0.697 | -0.222 0.149 |
two_year | -0.2808 | 0.018 | -15.451 | 0.000 | -0.316 -0.245 |
Omnibus: | 121.189 | Durbin-Watson: | 1.851 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 313.464 |
Skew: | -0.133 | Prob(JB): | 8.55e-69 |
Kurtosis: | 4.462 | Cond. No. | 121. |
res2 = smf.ols("log(payaverage) ~ has_advanced_degree*(division) + C(two_year)", data=data).fit()
res2.summary()
Dep. Variable: | log(payaverage) | R-squared: | 0.147 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.144 |
Method: | Least Squares | F-statistic: | 47.97 |
Date: | Wed, 30 Apr 2014 | Prob (F-statistic): | 6.04e-132 |
Time: | 17:17:40 | Log-Likelihood: | -2284.5 |
No. Observations: | 4182 | AIC: | 4601. |
Df Residuals: | 4166 | BIC: | 4702. |
Df Model: | 15 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | 7.9780 | 0.013 | 635.700 | 0.000 | 7.953 8.003 |
has_advanced_degree[T.None] | 0.0528 | 0.052 | 1.015 | 0.310 | -0.049 0.155 |
has_advanced_degree[T.PhD] | 0.1468 | 0.021 | 7.113 | 0.000 | 0.106 0.187 |
division[T.Gen/LibA] | -0.0966 | 0.037 | -2.624 | 0.009 | -0.169 -0.024 |
division[T.Prof/ApS] | -0.0030 | 0.023 | -0.131 | 0.896 | -0.048 0.042 |
division[T.Sci/Tech] | -5.09e-05 | 0.030 | -0.002 | 0.999 | -0.060 0.060 |
division[T.SocSci] | -0.0180 | 0.025 | -0.712 | 0.476 | -0.067 0.032 |
C(two_year)[T.1.0] | -0.3035 | 0.015 | -20.138 | 0.000 | -0.333 -0.274 |
has_advanced_degree[T.None]:division[T.Gen/LibA] | -0.1612 | 0.119 | -1.350 | 0.177 | -0.395 0.073 |
has_advanced_degree[T.PhD]:division[T.Gen/LibA] | 0.0722 | 0.063 | 1.149 | 0.251 | -0.051 0.195 |
has_advanced_degree[T.None]:division[T.Prof/ApS] | 0.2017 | 0.079 | 2.568 | 0.010 | 0.048 0.356 |
has_advanced_degree[T.PhD]:division[T.Prof/ApS] | 0.0022 | 0.039 | 0.056 | 0.955 | -0.074 0.078 |
has_advanced_degree[T.None]:division[T.Sci/Tech] | -0.0794 | 0.089 | -0.888 | 0.374 | -0.255 0.096 |
has_advanced_degree[T.PhD]:division[T.Sci/Tech] | 0.0914 | 0.045 | 2.047 | 0.041 | 0.004 0.179 |
has_advanced_degree[T.None]:division[T.SocSci] | 0.1447 | 0.134 | 1.084 | 0.278 | -0.117 0.406 |
has_advanced_degree[T.PhD]:division[T.SocSci] | 0.0382 | 0.038 | 0.994 | 0.320 | -0.037 0.114 |
Omnibus: | 113.848 | Durbin-Watson: | 1.833 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 281.411 |
Skew: | 0.014 | Prob(JB): | 7.81e-62 |
Kurtosis: | 4.270 | Cond. No. | 27.1 |
data.rename(columns={'2_year':'two_year'}, inplace=True)
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4182 entries, 1809 to 6927 Data columns (total 54 columns): department_id 4182 non-null int64 payaverage 4182 non-null int64 paytype 662 non-null float64 user_supplied_department_name 4177 non-null object course_type 0 non-null float64 contract_type 0 non-null object period_term 4121 non-null float64 period_year 4121 non-null float64 has_contract 0 non-null float64 has_retirement 3878 non-null float64 has_health_insurance 3914 non-null float64 has_governance 3897 non-null float64 has_union 3533 non-null float64 union_name 3515 non-null object is_school_rep 4182 non-null int64 course_syllabus 4083 non-null float64 office_space 4003 non-null float64 student_evals 3483 non-null float64 has_advanced_degree 4182 non-null object paytype_credits 486 non-null float64 paytype_class_size 18 non-null float64 paytype_hours 168 non-null float64 paytype_weeks 168 non-null float64 paytype_courses 103 non-null float64 department_type_id 4182 non-null int64 display_name 4182 non-null object department_type_name 4182 non-null object college_class 4182 non-null object college_carnegie 4182 non-null object college_unit_id 4182 non-null int64 college_state 4182 non-null object college_id 4182 non-null int64 college_hbcu 0 non-null float64 college_flagship 0 non-null float64 college_name 4182 non-null object public 4182 non-null bool private 4182 non-null bool for-profit 4182 non-null bool private not-for-profit 4182 non-null bool division 4182 non-null object 2_year 4182 non-null float64 degree_JD 4182 non-null float64 degree_MA/MS 4182 non-null float64 degree_MD 4182 non-null float64 degree_None 4182 non-null float64 degree_PhD 4182 non-null float64 degree_nan 4182 non-null float64 div_Arts/Hum 4182 non-null float64 div_Gen/LibA 4182 non-null float64 div_Other 4182 non-null float64 div_Prof/ApS 4182 non-null float64 div_Sci/Tech 4182 non-null float64 div_SocSci 4182 non-null float64 any_health_ins 3663 non-null float64 dtypes: bool(4), float64(33), int64(6), object(11)
result.t_test()
data.payaverage.apply(np.log).plot(kind='kde')
<matplotlib.axes.AxesSubplot at 0x113de5b90>
results.params.index
Index([u'Intercept', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]', u'C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum]', u'C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS]', u'C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech]', u'C(division, Treatment(reference='Gen/LibA'))[T.SocSci]', u'public[T.True]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech]', u'C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci]', u'C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci]', u'has_union', u'two_year', u'public[T.True]:two_year'], dtype='object')
results = smf.ols(
"log(payaverage) ~ "\
"C(has_advanced_degree, Treatment(reference='None'))*C(division, Treatment(reference='Gen/LibA'))"\
"+ has_union + C(college_class) + any_health_ins"
"+ course_syllabus"
, data=data).fit()
results.summary()
Dep. Variable: | log(payaverage) | R-squared: | 0.242 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.237 |
Method: | Least Squares | F-statistic: | 48.38 |
Date: | Wed, 30 Apr 2014 | Prob (F-statistic): | 1.30e-173 |
Time: | 17:39:15 | Log-Likelihood: | -1506.9 |
No. Observations: | 3197 | AIC: | 3058. |
Df Residuals: | 3175 | BIC: | 3191. |
Df Model: | 21 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | 7.4011 | 0.208 | 35.666 | 0.000 | 6.994 7.808 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS] | 0.1916 | 0.110 | 1.737 | 0.083 | -0.025 0.408 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD] | 0.3947 | 0.115 | 3.422 | 0.001 | 0.169 0.621 |
C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] | 0.2930 | 0.117 | 2.505 | 0.012 | 0.064 0.522 |
C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] | 0.4776 | 0.119 | 4.021 | 0.000 | 0.245 0.710 |
C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] | 0.3658 | 0.129 | 2.830 | 0.005 | 0.112 0.619 |
C(division, Treatment(reference='Gen/LibA'))[T.SocSci] | 0.4481 | 0.161 | 2.782 | 0.005 | 0.132 0.764 |
C(college_class)[T.2-year private not-for-profit] | -0.2048 | 0.222 | -0.923 | 0.356 | -0.640 0.230 |
C(college_class)[T.2-year public] | -0.1448 | 0.139 | -1.044 | 0.296 | -0.417 0.127 |
C(college_class)[T.4-year private not-for-profit] | 0.1383 | 0.138 | 0.999 | 0.318 | -0.133 0.410 |
C(college_class)[T.4-year public] | 0.1093 | 0.139 | 0.788 | 0.431 | -0.162 0.381 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] | -0.2057 | 0.123 | -1.670 | 0.095 | -0.447 0.036 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Arts/Hum] | -0.2917 | 0.128 | -2.274 | 0.023 | -0.543 -0.040 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] | -0.3719 | 0.126 | -2.950 | 0.003 | -0.619 -0.125 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Prof/ApS] | -0.4369 | 0.132 | -3.308 | 0.001 | -0.696 -0.178 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] | -0.2224 | 0.137 | -1.618 | 0.106 | -0.492 0.047 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.Sci/Tech] | -0.2467 | 0.142 | -1.739 | 0.082 | -0.525 0.031 |
C(has_advanced_degree, Treatment(reference='None'))[T.MA/MS]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] | -0.3961 | 0.167 | -2.371 | 0.018 | -0.724 -0.069 |
C(has_advanced_degree, Treatment(reference='None'))[T.PhD]:C(division, Treatment(reference='Gen/LibA'))[T.SocSci] | -0.4560 | 0.171 | -2.673 | 0.008 | -0.791 -0.121 |
has_union | -0.0844 | 0.113 | -0.748 | 0.455 | -0.306 0.137 |
any_health_ins | 0.2665 | 0.017 | 15.441 | 0.000 | 0.233 0.300 |
course_syllabus | 0.0973 | 0.008 | 11.924 | 0.000 | 0.081 0.113 |
Omnibus: | 143.220 | Durbin-Watson: | 1.832 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 411.572 |
Skew: | -0.173 | Prob(JB): | 4.25e-90 |
Kurtosis: | 4.723 | Cond. No. | 172. |
res3 = smf.ols(
"log(payaverage) ~ college_carnegie", data=data).fit()
res3.summary()
Dep. Variable: | log(payaverage) | R-squared: | 0.294 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.288 |
Method: | Least Squares | F-statistic: | 50.80 |
Date: | Wed, 30 Apr 2014 | Prob (F-statistic): | 5.26e-283 |
Time: | 17:35:54 | Log-Likelihood: | -1889.6 |
No. Observations: | 4182 | AIC: | 3849. |
Df Residuals: | 4147 | BIC: | 4071. |
Df Model: | 34 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | 7.6848 | 0.382 | 20.129 | 0.000 | 6.936 8.433 |
college_carnegie[T.Associate's--Public Rural-serving Medium] | 0.1854 | 0.418 | 0.443 | 0.658 | -0.635 1.005 |
college_carnegie[T.Associate's--Public Suburban-serving Multicampus_] | 0.1166 | 0.399 | 0.292 | 0.770 | -0.665 0.898 |
college_carnegie[T.Associate's--Public Urban-serving Multicampus] | -0.1892 | 0.540 | -0.350 | 0.726 | -1.248 0.869 |
college_carnegie[T.Associates--Private Not-for-profit] | -0.2084 | 0.399 | -0.523 | 0.601 | -0.990 0.573 |
college_carnegie[T.Associates--Private Not-for-profit 4-year Primarily Associates] | -0.0931 | 0.412 | -0.226 | 0.821 | -0.902 0.715 |
college_carnegie[T.Associates--Public 2-year colleges under 4-year universities] | 0.2094 | 0.396 | 0.529 | 0.597 | -0.567 0.986 |
college_carnegie[T.Associates--Public 4-year Primarily Associates] | -0.0373 | 0.386 | -0.097 | 0.923 | -0.793 0.719 |
college_carnegie[T.Associates--Public Rural-serving Large] | -0.0528 | 0.383 | -0.138 | 0.890 | -0.803 0.697 |
college_carnegie[T.Associates--Public Rural-serving Medium] | -0.1522 | 0.383 | -0.398 | 0.691 | -0.903 0.598 |
college_carnegie[T.Associates--Public Rural-serving Small] | -0.1306 | 0.394 | -0.331 | 0.741 | -0.904 0.642 |
college_carnegie[T.Associates--Public Special Use] | -0.4752 | 0.408 | -1.164 | 0.244 | -1.275 0.325 |
college_carnegie[T.Associates--Public Suburban-serving Multicampus] | 0.1881 | 0.383 | 0.491 | 0.623 | -0.563 0.939 |
college_carnegie[T.Associates--Public Suburban-serving Single Campus] | 0.0920 | 0.383 | 0.240 | 0.810 | -0.659 0.843 |
college_carnegie[T.Associates--Public Urban-serving Multicampus] | 0.0513 | 0.383 | 0.134 | 0.893 | -0.699 0.801 |
college_carnegie[T.Associates--Public Urban-serving Single Campus] | 0.0110 | 0.384 | 0.029 | 0.977 | -0.741 0.763 |
college_carnegie[T.Baccalaureate Colleges--Arts and Sciences] | 0.3979 | 0.383 | 1.039 | 0.299 | -0.353 1.148 |
college_carnegie[T.Baccalaureate Colleges--Diverse Fields] | 0.0938 | 0.383 | 0.245 | 0.806 | -0.657 0.844 |
college_carnegie[T.Baccalaureate and Associates Colleges] | -0.1336 | 0.387 | -0.346 | 0.730 | -0.891 0.624 |
college_carnegie[T.Doctoral and Research Universities] | 0.3410 | 0.383 | 0.891 | 0.373 | -0.409 1.091 |
college_carnegie[T.Master's Colleges and Universities (medium programs)] | 0.3166 | 0.540 | 0.586 | 0.558 | -0.742 1.375 |
college_carnegie[T.Master's Colleges and Universities--larger programs] | -0.1568 | 0.418 | -0.375 | 0.708 | -0.977 0.663 |
college_carnegie[T.Master's Colleges and Universities--medium programs] | 0.4761 | 0.394 | 1.207 | 0.227 | -0.297 1.249 |
college_carnegie[T.Masters Colleges and Universities--larger programs] | 0.2871 | 0.382 | 0.752 | 0.452 | -0.462 1.036 |
college_carnegie[T.Masters Colleges and Universities--medium programs] | 0.2151 | 0.383 | 0.562 | 0.574 | -0.535 0.965 |
college_carnegie[T.Masters Colleges and Universities--smaller programs] | 0.1599 | 0.384 | 0.417 | 0.677 | -0.592 0.912 |
college_carnegie[T.Other health professions schools] | 0.3096 | 0.394 | 0.787 | 0.431 | -0.462 1.081 |
college_carnegie[T.Other technology-related schools] | -0.5263 | 0.441 | -1.194 | 0.233 | -1.391 0.338 |
college_carnegie[T.Research Universities--high research activity] | 0.4150 | 0.382 | 1.086 | 0.278 | -0.335 1.165 |
college_carnegie[T.Research Universities--very high research activity] | 0.7037 | 0.382 | 1.842 | 0.066 | -0.045 1.453 |
college_carnegie[T.Schools of art- music- and design] | 0.5276 | 0.385 | 1.372 | 0.170 | -0.226 1.281 |
college_carnegie[T.Schools of business and management] | -0.3893 | 0.399 | -0.976 | 0.329 | -1.171 0.392 |
college_carnegie[T.Schools of engineering] | 0.3219 | 0.441 | 0.730 | 0.465 | -0.542 1.186 |
college_carnegie[T.Theological seminaries- Bible colleges- and other faith-related institutions] | -0.0367 | 0.400 | -0.092 | 0.927 | -0.822 0.748 |
college_carnegie[T.Tribal Colleges] | -0.2327 | 0.468 | -0.498 | 0.619 | -1.149 0.684 |
Omnibus: | 150.798 | Durbin-Watson: | 1.851 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 440.628 |
Skew: | -0.017 | Prob(JB): | 2.08e-96 |
Kurtosis: | 4.590 | Cond. No. | 402. |
data.college_carnegie.value_counts()
Masters Colleges and Universities--larger programs 866 Research Universities--very high research activity 612 Research Universities--high research activity 364 Associates--Public Urban-serving Multicampus 252 Doctoral and Research Universities 232 Associates--Public Rural-serving Large 229 Masters Colleges and Universities--medium programs 226 Baccalaureate Colleges--Diverse Fields 199 Baccalaureate Colleges--Arts and Sciences 187 Associates--Public Rural-serving Medium 176 Associates--Public Suburban-serving Multicampus 170 Associates--Public Suburban-serving Single Campus 166 Masters Colleges and Universities--smaller programs 104 Associates--Public Urban-serving Single Campus 104 Schools of art- music- and design 69 Associates--Public 4-year Primarily Associates 50 Baccalaureate and Associates Colleges 40 Other health professions schools 16 Associates--Public Rural-serving Small 15 Master's Colleges and Universities--medium programs 15 Associates--Public 2-year colleges under 4-year universities 13 Associate's--Public Suburban-serving Multicampus_ 11 Schools of business and management 11 Associates--Private Not-for-profit 11 Theological seminaries- Bible colleges- and other faith-related institutions 10 Associates--Public Special Use 7 Associates--Private Not-for-profit 4-year Primarily Associates 6 Master's Colleges and Universities--larger programs 5 Associate's--Public Rural-serving Medium 5 Other technology-related schools 3 Schools of engineering 3 Tribal Colleges 2 Master's Colleges and Universities (medium programs) 1 Associate's--Public Urban-serving Multicampus 1 Associate's--Public Rural-serving Large 1 dtype: int64
pd.pivot_table(data, rows='union_name', aggfunc={'has_union': 'count'})
has_union | |
---|---|
union_name | |
AAUP | 92 |
AFSCME | 13 |
AFT | 177 |
NEA | 82 |
None | 2783 |
Other | 309 |
SEIU | 58 |
7 rows × 1 columns
ipeds = pd.read_csv('data/ipeds2012/CSV_512014-1015/CSV_512014-1015.csv')
ipeds.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 7330 entries, 0 to 7329 Data columns (total 161 columns): unitid int64 institution name object year int64 DRVIC2012.Percent admitted - total float64 DRVIC2012.Admissions yield - total float64 DRVIC2012.Tuition and fees, 2009-10 float64 DRVIC2012.Tuition and fees, 2010-11 float64 DRVIC2012.Tuition and fees, 2011-12 float64 DRVIC2012.Tuition and fees, 2012-13 float64 HD2012.Institution size category object HD2012.Geographic region object HD2012.Sector of institution object HD2012.Level of institution object HD2012.Control of institution object HD2012.Degree-granting status object HD2012.Historically Black College or University object HD2012.Degree of urbanization (Urban-centric locale) object HD2012.Institutional category object DRVEF2012.Undergraduate enrollment float64 DRVEF2012.Graduate enrollment float64 DRVEF2012.Full-time undergraduate enrollment float64 DRVEF2012.Part-time undergraduate enrollment float64 DRVEF2012.Percent of total enrollment that are American Indian or Alaska Native float64 DRVEF2012.Percent of total enrollment that are Asian float64 DRVEF2012.Percent of total enrollment that are Black or African American float64 DRVEF2012.Percent of total enrollment that are Hispanic/Latino float64 DRVEF2012.Percent of total enrollment that are Native Hawaiian or Other Pacific Islander float64 DRVEF2012.Percent of total enrollment that are White float64 DRVEF2012.Percent of total enrollment that are two or more races float64 DRVEF2012.Percent of total enrollment that are Race/ethnicity unknown float64 DRVEF2012.Percent of total enrollment that are Nonresident Alien float64 DRVEF2012.Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander float64 DRVEF2012.Percent of total enrollment that are women float64 EF2012D.Student-to-faculty ratio float64 EF2012D.Full-time retention rate, 2012 float64 EF2012D.Part-time retention rate, 2012 float64 DRVEF2012.Percent of undergraduate enrollment 18-24 float64 DRVEF2012.Percent of undergraduate enrollment, 25-64 float64 DRVEF2012.Percent of undergraduate enrollment over 65 float64 DRVEFDE2012.Percent of students enrolled exclusively in distance education courses float64 DRVEFDE2012.Percent of students enrolled in some but not all distance education courses float64 DRVEFDE2012.Percent of students not enrolled in any distance education courses float64 DRVGR2012.Graduation rate, total cohort float64 DRVGR2012.Transfer-out rate, total cohort float64 GR200_12.Graduation rate - degree/certificate within 100% of normal time float64 DRVGR2012.Graduation rate - bachelor's degree within 4 years, total float64 SFA1112.Percent of full-time first-time undergraduates receiving any financial aid float64 SFA1112.Percent of full-time first-time undergraduates receiving federal, state, local or institutional grant aid float64 SFA1112.Average amount of federal, state, local or institutional grant aid received float64 DRVF2012.Core revenues, total dollars (GASB) float64 DRVF2012.Tuition and fees as a percent of core revenues (GASB) float64 DRVF2012.State appropriations as percent of core revenues (GASB) float64 DRVF2012.Local appropriations as a percent of core revenues (GASB) float64 DRVF2012.Government grants and contracts as a percent of core revenues (GASB) float64 DRVF2012.Private gifts, grants, and contracts as a percent of core revenues (GASB) float64 DRVF2012.Investment return as a percent of core revenues (GASB) float64 DRVF2012.Other revenues as a percent of core revenues (GASB) float64 DRVF2012.Core revenues, total dollars (FASB) float64 DRVF2012.Tuition and fees as a percent of core revenues (FASB) float64 DRVF2012.Government grants and contracts as a percent of core revenues (FASB) float64 DRVF2012.Private gifts, grants, contracts/contributions from affiliated entities as a percent of core revenues (FASB) float64 DRVF2012.Investment return as a percent of core revenues (FASB) float64 DRVF2012.Other revenues as a percent of core revenues (FASB) float64 DRVF2012.Core revenues, total dollars (for-profit institutions) float64 DRVF2012.Tuition and fees as a percent of core revenues (for-profit institutions) float64 DRVF2012.Govenment appropriations, grants, and contracts as a percent of core revenues (for-profit institutions) float64 DRVF2012.Sales and services of educational activities as a percent of core revenues (for-profit institutions) float64 DRVF2012.Other revenues as a percent of core revenues (for-profit institutions) float64 DRVF2012.Revenues from tuition and fees per FTE (GASB) float64 DRVF2012.Revenues from tuition and fees per FTE (FASB) float64 DRVF2012.Core expenses, total dollars (GASB) float64 DRVF2012.Instruction expenses as a percent of total core expenses (GASB) float64 DRVF2012.Research expenses as a percent of total core expenses (GASB) float64 DRVF2012.Public service expenses as a percent of total core expenses (GASB) float64 DRVF2012.Academic support expenses as a percent of total core expenses (GASB) float64 DRVF2012.Student service expenses as a percent of total core expenses (GASB) float64 DRVF2012.Institutional support expenses as a percent of total core expenses (GASB) float64 DRVF2012.Other core expenses as a percent of total core expenses (GASB) float64 DRVF2012.Core expenses, total dollars (FASB) float64 DRVF2012.Instruction expenses as a percent of total core expenses (FASB) float64 DRVF2012.Research expenses as a percent of total core expenses (FASB) float64 DRVF2012.Public service expenses as a percent of total core expenses (FASB) float64 DRVF2012.Academic support expenses as a percent of total core expenses (FASB) float64 DRVF2012.Student service expenses as a percent of total core expenses (FASB) float64 DRVF2012.Institutional support expenses as a percent of total core expenses (FASB) float64 DRVF2012.Other core expenses as a percent of total core expenses (FASB) float64 DRVF2012.Core expenses, total dollars (for-profit institutons) float64 DRVF2012.Instruction expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Academic and institutional support, and student service expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Other core expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Instruction expenses per FTE (GASB) float64 DRVF2012.Research expenses per FTE (GASB) float64 DRVF2012.Public service expenses per FTE (GASB) float64 DRVF2012.Academic support expenses per FTE (GASB) float64 DRVF2012.Student service expenses per FTE (GASB) float64 DRVF2012.Institutional support expenses per FTE (GASB) float64 DRVF2012.All other core expenses per FTE (GASB) float64 DRVF2012.Instruction expenses per FTE (FASB) float64 DRVF2012.Research expenses per FTE (FASB) float64 DRVF2012.Public service expenses per FTE (FASB) float64 DRVF2012.Academic support expenses per FTE (FASB) float64 DRVF2012.Student service expenses per FTE (FASB) float64 DRVF2012.Institutional support expenses per FTE (FASB) float64 DRVF2012.All other core expenses per FTE (FASB) float64 DRVF2012.Instruction expenses per FTE (for-profit institutions) float64 DRVF2012.Academic and institutional support, and student services expense per FTE (for-profit institutions) float64 DRVF2012.All other core expenses per FTE (for-profit institutions) float64 DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for other core expense functions as a percent of total expenses for other core expense functions (GASB) float64 DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (GASB) float64 DRVF2012.Total salaries and wage expenses as a percent of total expenses (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for other core expense functions as a percent of total expenses for other core expense functions (FASB) float64 DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (FASB) float64 DRVF2012.Total salaries and wage expenses as a percent of total expenses (FASB) float64 DRVF2012.Endowment assets (year end) per FTE enrollment (GASB) float64 DRVF2012.Endowment assets (year end) per FTE enrollment (FASB) float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - all ranks float64 DRVHR2012.Average salary equated to 9 months of full-time insructional staff - professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - associate professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - assistant professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - instructors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - lecturers float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - No academic rank float64 DRVHR2012.Total FTE staff int64 DRVHR2012.Postsecondary Teachers FTE staff int64 DRVHR2012.Postsecondary Teachers Instructional FTE int64 DRVHR2012.Postsecondary Teachers Research FTE int64 DRVHR2012.Postsecondary Teachers Public Service FTE int64 DRVHR2012.Librarians, Curators, and Archivists and other teaching and Instructional support occupations int64 DRVHR2012.Librarians, Curators, and Archivists FTE int64 DRVHR2012.Other teaching and Instructional Support FTE int64 DRVHR2012.Management FTE int64 DRVHR2012.Business and Financial Operations FTE int64 DRVHR2012.Computer, Engineering, and Science FTE int64 DRVHR2012.Community Service, Legal, Arts, and Media FTE int64 DRVHR2012.Healthcare FTE int64 DRVHR2012.Service, sales, office/admin support, natural resources, construction, maintenance, production, transportation & materials moving FTE int64 DRVHR2012.Service FTE int64 DRVHR2012.Sales and Related FTE int64 DRVHR2012.Office and Administrative Support FTE int64 DRVHR2012.Natural Resources, Construction, and Maintenance FTE int64 DRVHR2012.Production, Transportation, and Material Moving FTE int64 SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12.1 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11.1 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10.1 float64 dtypes: float64(130), int64(21), object(10)
ipeds[ipeds['institution name'].isin(data.college_name.unique())].info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1078 entries, 1 to 6453 Data columns (total 161 columns): unitid int64 institution name object year int64 DRVIC2012.Percent admitted - total float64 DRVIC2012.Admissions yield - total float64 DRVIC2012.Tuition and fees, 2009-10 float64 DRVIC2012.Tuition and fees, 2010-11 float64 DRVIC2012.Tuition and fees, 2011-12 float64 DRVIC2012.Tuition and fees, 2012-13 float64 HD2012.Institution size category object HD2012.Geographic region object HD2012.Sector of institution object HD2012.Level of institution object HD2012.Control of institution object HD2012.Degree-granting status object HD2012.Historically Black College or University object HD2012.Degree of urbanization (Urban-centric locale) object HD2012.Institutional category object DRVEF2012.Undergraduate enrollment float64 DRVEF2012.Graduate enrollment float64 DRVEF2012.Full-time undergraduate enrollment float64 DRVEF2012.Part-time undergraduate enrollment float64 DRVEF2012.Percent of total enrollment that are American Indian or Alaska Native float64 DRVEF2012.Percent of total enrollment that are Asian float64 DRVEF2012.Percent of total enrollment that are Black or African American float64 DRVEF2012.Percent of total enrollment that are Hispanic/Latino float64 DRVEF2012.Percent of total enrollment that are Native Hawaiian or Other Pacific Islander float64 DRVEF2012.Percent of total enrollment that are White float64 DRVEF2012.Percent of total enrollment that are two or more races float64 DRVEF2012.Percent of total enrollment that are Race/ethnicity unknown float64 DRVEF2012.Percent of total enrollment that are Nonresident Alien float64 DRVEF2012.Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander float64 DRVEF2012.Percent of total enrollment that are women float64 EF2012D.Student-to-faculty ratio float64 EF2012D.Full-time retention rate, 2012 float64 EF2012D.Part-time retention rate, 2012 float64 DRVEF2012.Percent of undergraduate enrollment 18-24 float64 DRVEF2012.Percent of undergraduate enrollment, 25-64 float64 DRVEF2012.Percent of undergraduate enrollment over 65 float64 DRVEFDE2012.Percent of students enrolled exclusively in distance education courses float64 DRVEFDE2012.Percent of students enrolled in some but not all distance education courses float64 DRVEFDE2012.Percent of students not enrolled in any distance education courses float64 DRVGR2012.Graduation rate, total cohort float64 DRVGR2012.Transfer-out rate, total cohort float64 GR200_12.Graduation rate - degree/certificate within 100% of normal time float64 DRVGR2012.Graduation rate - bachelor's degree within 4 years, total float64 SFA1112.Percent of full-time first-time undergraduates receiving any financial aid float64 SFA1112.Percent of full-time first-time undergraduates receiving federal, state, local or institutional grant aid float64 SFA1112.Average amount of federal, state, local or institutional grant aid received float64 DRVF2012.Core revenues, total dollars (GASB) float64 DRVF2012.Tuition and fees as a percent of core revenues (GASB) float64 DRVF2012.State appropriations as percent of core revenues (GASB) float64 DRVF2012.Local appropriations as a percent of core revenues (GASB) float64 DRVF2012.Government grants and contracts as a percent of core revenues (GASB) float64 DRVF2012.Private gifts, grants, and contracts as a percent of core revenues (GASB) float64 DRVF2012.Investment return as a percent of core revenues (GASB) float64 DRVF2012.Other revenues as a percent of core revenues (GASB) float64 DRVF2012.Core revenues, total dollars (FASB) float64 DRVF2012.Tuition and fees as a percent of core revenues (FASB) float64 DRVF2012.Government grants and contracts as a percent of core revenues (FASB) float64 DRVF2012.Private gifts, grants, contracts/contributions from affiliated entities as a percent of core revenues (FASB) float64 DRVF2012.Investment return as a percent of core revenues (FASB) float64 DRVF2012.Other revenues as a percent of core revenues (FASB) float64 DRVF2012.Core revenues, total dollars (for-profit institutions) float64 DRVF2012.Tuition and fees as a percent of core revenues (for-profit institutions) float64 DRVF2012.Govenment appropriations, grants, and contracts as a percent of core revenues (for-profit institutions) float64 DRVF2012.Sales and services of educational activities as a percent of core revenues (for-profit institutions) float64 DRVF2012.Other revenues as a percent of core revenues (for-profit institutions) float64 DRVF2012.Revenues from tuition and fees per FTE (GASB) float64 DRVF2012.Revenues from tuition and fees per FTE (FASB) float64 DRVF2012.Core expenses, total dollars (GASB) float64 DRVF2012.Instruction expenses as a percent of total core expenses (GASB) float64 DRVF2012.Research expenses as a percent of total core expenses (GASB) float64 DRVF2012.Public service expenses as a percent of total core expenses (GASB) float64 DRVF2012.Academic support expenses as a percent of total core expenses (GASB) float64 DRVF2012.Student service expenses as a percent of total core expenses (GASB) float64 DRVF2012.Institutional support expenses as a percent of total core expenses (GASB) float64 DRVF2012.Other core expenses as a percent of total core expenses (GASB) float64 DRVF2012.Core expenses, total dollars (FASB) float64 DRVF2012.Instruction expenses as a percent of total core expenses (FASB) float64 DRVF2012.Research expenses as a percent of total core expenses (FASB) float64 DRVF2012.Public service expenses as a percent of total core expenses (FASB) float64 DRVF2012.Academic support expenses as a percent of total core expenses (FASB) float64 DRVF2012.Student service expenses as a percent of total core expenses (FASB) float64 DRVF2012.Institutional support expenses as a percent of total core expenses (FASB) float64 DRVF2012.Other core expenses as a percent of total core expenses (FASB) float64 DRVF2012.Core expenses, total dollars (for-profit institutons) float64 DRVF2012.Instruction expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Academic and institutional support, and student service expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Other core expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Instruction expenses per FTE (GASB) float64 DRVF2012.Research expenses per FTE (GASB) float64 DRVF2012.Public service expenses per FTE (GASB) float64 DRVF2012.Academic support expenses per FTE (GASB) float64 DRVF2012.Student service expenses per FTE (GASB) float64 DRVF2012.Institutional support expenses per FTE (GASB) float64 DRVF2012.All other core expenses per FTE (GASB) float64 DRVF2012.Instruction expenses per FTE (FASB) float64 DRVF2012.Research expenses per FTE (FASB) float64 DRVF2012.Public service expenses per FTE (FASB) float64 DRVF2012.Academic support expenses per FTE (FASB) float64 DRVF2012.Student service expenses per FTE (FASB) float64 DRVF2012.Institutional support expenses per FTE (FASB) float64 DRVF2012.All other core expenses per FTE (FASB) float64 DRVF2012.Instruction expenses per FTE (for-profit institutions) float64 DRVF2012.Academic and institutional support, and student services expense per FTE (for-profit institutions) float64 DRVF2012.All other core expenses per FTE (for-profit institutions) float64 DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for other core expense functions as a percent of total expenses for other core expense functions (GASB) float64 DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (GASB) float64 DRVF2012.Total salaries and wage expenses as a percent of total expenses (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for other core expense functions as a percent of total expenses for other core expense functions (FASB) float64 DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (FASB) float64 DRVF2012.Total salaries and wage expenses as a percent of total expenses (FASB) float64 DRVF2012.Endowment assets (year end) per FTE enrollment (GASB) float64 DRVF2012.Endowment assets (year end) per FTE enrollment (FASB) float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - all ranks float64 DRVHR2012.Average salary equated to 9 months of full-time insructional staff - professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - associate professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - assistant professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - instructors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - lecturers float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - No academic rank float64 DRVHR2012.Total FTE staff int64 DRVHR2012.Postsecondary Teachers FTE staff int64 DRVHR2012.Postsecondary Teachers Instructional FTE int64 DRVHR2012.Postsecondary Teachers Research FTE int64 DRVHR2012.Postsecondary Teachers Public Service FTE int64 DRVHR2012.Librarians, Curators, and Archivists and other teaching and Instructional support occupations int64 DRVHR2012.Librarians, Curators, and Archivists FTE int64 DRVHR2012.Other teaching and Instructional Support FTE int64 DRVHR2012.Management FTE int64 DRVHR2012.Business and Financial Operations FTE int64 DRVHR2012.Computer, Engineering, and Science FTE int64 DRVHR2012.Community Service, Legal, Arts, and Media FTE int64 DRVHR2012.Healthcare FTE int64 DRVHR2012.Service, sales, office/admin support, natural resources, construction, maintenance, production, transportation & materials moving FTE int64 DRVHR2012.Service FTE int64 DRVHR2012.Sales and Related FTE int64 DRVHR2012.Office and Administrative Support FTE int64 DRVHR2012.Natural Resources, Construction, and Maintenance FTE int64 DRVHR2012.Production, Transportation, and Material Moving FTE int64 SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12.1 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11.1 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10.1 float64 dtypes: float64(130), int64(21), object(10)
pd.merge(data[['college_name']], ipeds, left_index=True, right_on='institution name').info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 0 entries Data columns (total 162 columns): college_name object unitid int64 institution name object year int64 DRVIC2012.Percent admitted - total float64 DRVIC2012.Admissions yield - total float64 DRVIC2012.Tuition and fees, 2009-10 float64 DRVIC2012.Tuition and fees, 2010-11 float64 DRVIC2012.Tuition and fees, 2011-12 float64 DRVIC2012.Tuition and fees, 2012-13 float64 HD2012.Institution size category object HD2012.Geographic region object HD2012.Sector of institution object HD2012.Level of institution object HD2012.Control of institution object HD2012.Degree-granting status object HD2012.Historically Black College or University object HD2012.Degree of urbanization (Urban-centric locale) object HD2012.Institutional category object DRVEF2012.Undergraduate enrollment float64 DRVEF2012.Graduate enrollment float64 DRVEF2012.Full-time undergraduate enrollment float64 DRVEF2012.Part-time undergraduate enrollment float64 DRVEF2012.Percent of total enrollment that are American Indian or Alaska Native float64 DRVEF2012.Percent of total enrollment that are Asian float64 DRVEF2012.Percent of total enrollment that are Black or African American float64 DRVEF2012.Percent of total enrollment that are Hispanic/Latino float64 DRVEF2012.Percent of total enrollment that are Native Hawaiian or Other Pacific Islander float64 DRVEF2012.Percent of total enrollment that are White float64 DRVEF2012.Percent of total enrollment that are two or more races float64 DRVEF2012.Percent of total enrollment that are Race/ethnicity unknown float64 DRVEF2012.Percent of total enrollment that are Nonresident Alien float64 DRVEF2012.Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander float64 DRVEF2012.Percent of total enrollment that are women float64 EF2012D.Student-to-faculty ratio float64 EF2012D.Full-time retention rate, 2012 float64 EF2012D.Part-time retention rate, 2012 float64 DRVEF2012.Percent of undergraduate enrollment 18-24 float64 DRVEF2012.Percent of undergraduate enrollment, 25-64 float64 DRVEF2012.Percent of undergraduate enrollment over 65 float64 DRVEFDE2012.Percent of students enrolled exclusively in distance education courses float64 DRVEFDE2012.Percent of students enrolled in some but not all distance education courses float64 DRVEFDE2012.Percent of students not enrolled in any distance education courses float64 DRVGR2012.Graduation rate, total cohort float64 DRVGR2012.Transfer-out rate, total cohort float64 GR200_12.Graduation rate - degree/certificate within 100% of normal time float64 DRVGR2012.Graduation rate - bachelor's degree within 4 years, total float64 SFA1112.Percent of full-time first-time undergraduates receiving any financial aid float64 SFA1112.Percent of full-time first-time undergraduates receiving federal, state, local or institutional grant aid float64 SFA1112.Average amount of federal, state, local or institutional grant aid received float64 DRVF2012.Core revenues, total dollars (GASB) float64 DRVF2012.Tuition and fees as a percent of core revenues (GASB) float64 DRVF2012.State appropriations as percent of core revenues (GASB) float64 DRVF2012.Local appropriations as a percent of core revenues (GASB) float64 DRVF2012.Government grants and contracts as a percent of core revenues (GASB) float64 DRVF2012.Private gifts, grants, and contracts as a percent of core revenues (GASB) float64 DRVF2012.Investment return as a percent of core revenues (GASB) float64 DRVF2012.Other revenues as a percent of core revenues (GASB) float64 DRVF2012.Core revenues, total dollars (FASB) float64 DRVF2012.Tuition and fees as a percent of core revenues (FASB) float64 DRVF2012.Government grants and contracts as a percent of core revenues (FASB) float64 DRVF2012.Private gifts, grants, contracts/contributions from affiliated entities as a percent of core revenues (FASB) float64 DRVF2012.Investment return as a percent of core revenues (FASB) float64 DRVF2012.Other revenues as a percent of core revenues (FASB) float64 DRVF2012.Core revenues, total dollars (for-profit institutions) float64 DRVF2012.Tuition and fees as a percent of core revenues (for-profit institutions) float64 DRVF2012.Govenment appropriations, grants, and contracts as a percent of core revenues (for-profit institutions) float64 DRVF2012.Sales and services of educational activities as a percent of core revenues (for-profit institutions) float64 DRVF2012.Other revenues as a percent of core revenues (for-profit institutions) float64 DRVF2012.Revenues from tuition and fees per FTE (GASB) float64 DRVF2012.Revenues from tuition and fees per FTE (FASB) float64 DRVF2012.Core expenses, total dollars (GASB) float64 DRVF2012.Instruction expenses as a percent of total core expenses (GASB) float64 DRVF2012.Research expenses as a percent of total core expenses (GASB) float64 DRVF2012.Public service expenses as a percent of total core expenses (GASB) float64 DRVF2012.Academic support expenses as a percent of total core expenses (GASB) float64 DRVF2012.Student service expenses as a percent of total core expenses (GASB) float64 DRVF2012.Institutional support expenses as a percent of total core expenses (GASB) float64 DRVF2012.Other core expenses as a percent of total core expenses (GASB) float64 DRVF2012.Core expenses, total dollars (FASB) float64 DRVF2012.Instruction expenses as a percent of total core expenses (FASB) float64 DRVF2012.Research expenses as a percent of total core expenses (FASB) float64 DRVF2012.Public service expenses as a percent of total core expenses (FASB) float64 DRVF2012.Academic support expenses as a percent of total core expenses (FASB) float64 DRVF2012.Student service expenses as a percent of total core expenses (FASB) float64 DRVF2012.Institutional support expenses as a percent of total core expenses (FASB) float64 DRVF2012.Other core expenses as a percent of total core expenses (FASB) float64 DRVF2012.Core expenses, total dollars (for-profit institutons) float64 DRVF2012.Instruction expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Academic and institutional support, and student service expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Other core expenses as a percent of total core expenses (for-profit institutions) float64 DRVF2012.Instruction expenses per FTE (GASB) float64 DRVF2012.Research expenses per FTE (GASB) float64 DRVF2012.Public service expenses per FTE (GASB) float64 DRVF2012.Academic support expenses per FTE (GASB) float64 DRVF2012.Student service expenses per FTE (GASB) float64 DRVF2012.Institutional support expenses per FTE (GASB) float64 DRVF2012.All other core expenses per FTE (GASB) float64 DRVF2012.Instruction expenses per FTE (FASB) float64 DRVF2012.Research expenses per FTE (FASB) float64 DRVF2012.Public service expenses per FTE (FASB) float64 DRVF2012.Academic support expenses per FTE (FASB) float64 DRVF2012.Student service expenses per FTE (FASB) float64 DRVF2012.Institutional support expenses per FTE (FASB) float64 DRVF2012.All other core expenses per FTE (FASB) float64 DRVF2012.Instruction expenses per FTE (for-profit institutions) float64 DRVF2012.Academic and institutional support, and student services expense per FTE (for-profit institutions) float64 DRVF2012.All other core expenses per FTE (for-profit institutions) float64 DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for other core expense functions as a percent of total expenses for other core expense functions (GASB) float64 DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (GASB) float64 DRVF2012.Total salaries and wage expenses as a percent of total expenses (GASB) float64 DRVF2012.Salaries, wages, and benefit expenses for core expenses as a percent of total core expenses (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for instruction as a percent of total expenses for instruction (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for research as a percent of total expenses for research (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for public service as a percent of total expenses for public service (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for academic support as a percent of total expenses for academic support (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for student services as a percent of total expenses for student services (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for institutional support as a percent of total expenses for institutional support (FASB) float64 DRVF2012.Salaries, wages, and benefit expenses for other core expense functions as a percent of total expenses for other core expense functions (FASB) float64 DRVF2012.Total salaries, wages, and benefit expenses as a percent of total expenses (FASB) float64 DRVF2012.Total salaries and wage expenses as a percent of total expenses (FASB) float64 DRVF2012.Endowment assets (year end) per FTE enrollment (GASB) float64 DRVF2012.Endowment assets (year end) per FTE enrollment (FASB) float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - all ranks float64 DRVHR2012.Average salary equated to 9 months of full-time insructional staff - professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - associate professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - assistant professors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - instructors float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - lecturers float64 DRVHR2012.Average salary equated to 9 months of full-time instructional staff - No academic rank float64 DRVHR2012.Total FTE staff int64 DRVHR2012.Postsecondary Teachers FTE staff int64 DRVHR2012.Postsecondary Teachers Instructional FTE int64 DRVHR2012.Postsecondary Teachers Research FTE int64 DRVHR2012.Postsecondary Teachers Public Service FTE int64 DRVHR2012.Librarians, Curators, and Archivists and other teaching and Instructional support occupations int64 DRVHR2012.Librarians, Curators, and Archivists FTE int64 DRVHR2012.Other teaching and Instructional Support FTE int64 DRVHR2012.Management FTE int64 DRVHR2012.Business and Financial Operations FTE int64 DRVHR2012.Computer, Engineering, and Science FTE int64 DRVHR2012.Community Service, Legal, Arts, and Media FTE int64 DRVHR2012.Healthcare FTE int64 DRVHR2012.Service, sales, office/admin support, natural resources, construction, maintenance, production, transportation & materials moving FTE int64 DRVHR2012.Service FTE int64 DRVHR2012.Sales and Related FTE int64 DRVHR2012.Office and Administrative Support FTE int64 DRVHR2012.Natural Resources, Construction, and Maintenance FTE int64 DRVHR2012.Production, Transportation, and Material Moving FTE int64 SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2011-12.1 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2010-11.1 float64 SFA1112.Average net price-students receiving grant or scholarship aid, 2009-10.1 float64 dtypes: float64(130), int64(21), object(11)
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4182 entries, 1809 to 6927 Data columns (total 54 columns): department_id 4182 non-null int64 payaverage 4182 non-null int64 paytype 662 non-null float64 user_supplied_department_name 4177 non-null object course_type 0 non-null float64 contract_type 0 non-null object period_term 4121 non-null float64 period_year 4121 non-null float64 has_contract 0 non-null float64 has_retirement 3878 non-null float64 has_health_insurance 3914 non-null float64 has_governance 3897 non-null float64 has_union 3533 non-null float64 union_name 3515 non-null object is_school_rep 4182 non-null int64 course_syllabus 4083 non-null float64 office_space 4003 non-null float64 student_evals 3483 non-null float64 has_advanced_degree 4182 non-null object paytype_credits 486 non-null float64 paytype_class_size 18 non-null float64 paytype_hours 168 non-null float64 paytype_weeks 168 non-null float64 paytype_courses 103 non-null float64 department_type_id 4182 non-null int64 display_name 4182 non-null object department_type_name 4182 non-null object college_class 4182 non-null object college_carnegie 4182 non-null object college_unit_id 4182 non-null int64 college_state 4182 non-null object college_id 4182 non-null int64 college_hbcu 0 non-null float64 college_flagship 0 non-null float64 college_name 4182 non-null object public 4182 non-null bool private 4182 non-null bool for-profit 4182 non-null bool private not-for-profit 4182 non-null bool division 4182 non-null object two_year 4182 non-null float64 degree_JD 4182 non-null float64 degree_MA/MS 4182 non-null float64 degree_MD 4182 non-null float64 degree_None 4182 non-null float64 degree_PhD 4182 non-null float64 degree_nan 4182 non-null float64 div_Arts/Hum 4182 non-null float64 div_Gen/LibA 4182 non-null float64 div_Other 4182 non-null float64 div_Prof/ApS 4182 non-null float64 div_Sci/Tech 4182 non-null float64 div_SocSci 4182 non-null float64 any_health_ins 3663 non-null float64 dtypes: bool(4), float64(33), int64(6), object(11)