When I started my exploration of The Relationship Between Child
Poverty and College and Career Readiness in Missouri School Districts,
I wanted to explore the data in two ways; through SQL queries and in
R. The report from my exploration in R can be found
here.
Whereas the report in R focuses on the story of the data, this focuses
on the queries themselves, this report documents the questions and the
SQL queries used to answer them in PostgresSQL. For the full story, as
well as more detailed information about understanding the data.
Note- In the report, I renamed the determinations for clarity.
These queries use the original determinations
Create Tables and import data in Database
After collecting the relevant data, I needed to create tables within
my Missouri Education database and import the data.
This one is long, Click to reveal query
-- Table: public.ccr
-- DROP TABLE IF EXISTS public.ccr;
CREATE TABLE IF NOT EXISTS public.ccr
(
year integer,
county_district_code integer,
district_name character varying COLLATE pg_catalog."default" NOT NULL,
is_k_12_district boolean,
is_k_8_only_district boolean,
beg_grade character varying COLLATE pg_catalog."default",
end_grade integer,
dist_exist_3_yrs boolean,
is_growing_a_high_school boolean,
assessment_curr_pct numeric,
assessment_pyr2_pct numeric,
assessment_pyr3_pct numeric,
assessment_status_3yr_pct numeric,
assessment_status_determination character varying COLLATE pg_catalog."default",
assessment_prior_2yr_avg numeric,
assessment_curr_2yr_pct_avg numeric,
assessment_progress_determination character varying COLLATE pg_catalog."default",
college_credit_curr_pct numeric,
college_credit_pyr2_pct numeric,
college_credit_pyr3_pct numeric,
college_credit_status_3yr_pct numeric,
college_credit_status_determination character varying COLLATE pg_catalog."default",
college_credit_prior_2yr_avg numeric,
college_credit_curr_2yr_pct_avg numeric,
college_credit_progress_determination character varying COLLATE pg_catalog."default",
secondary_placement_curr_pct numeric,
secondary_placement_pyr2_pct numeric,
secondary_placement_pyr3_pct numeric,
secondary_placement_status_3yr_pct numeric,
secondary_placement_status_determination character varying COLLATE pg_catalog."default",
secondary_placement_prior_2yr_avg numeric,
secondary_placement_curr_2_yr_pct_avg numeric,
secondary_placement_progress_determination character varying COLLATE pg_catalog."default",
CONSTRAINT ccr_pkey PRIMARY KEY (district_name)
)
TABLESPACE pg_default;
ALTER TABLE IF EXISTS public.ccr
OWNER to postgres;
/*Import data*/
COPY ccr
FROM 'C:/Users...' --Truncated path
DELIMITER ','
CSV HEADER;
-- Table: public.poverty
-- DROP TABLE IF EXISTS public.poverty;
CREATE TABLE IF NOT EXISTS public.poverty
(
year integer,
id numeric NOT NULL,
district_name character varying COLLATE pg_catalog."default",
grades character varying COLLATE pg_catalog."default",
total_pop numeric,
rel_pop_ages_5_to_17 numeric,
rel_5_to_17_in_fam_in_pov numeric,
rel_5_to_17_pov_ratio numeric,
CONSTRAINT poverty_pkey PRIMARY KEY (id)
)
TABLESPACE pg_default;
ALTER TABLE IF EXISTS public.poverty
OWNER to postgres;
/*Import data*/
COPY poverty
FROM 'C:/Users...' --Truncated path
DELIMITER ','
CSV HEADER;
-- Table: public.finance
-- DROP TABLE IF EXISTS public.finance;
CREATE TABLE IF NOT EXISTS public.finance
(
county_district_code numeric,
district_name character varying COLLATE pg_catalog."default",
year integer,
pk_enrollment integer,
enrollment numeric,
percent_local numeric,
percent_state numeric,
percent_federal numeric,
average_daily_attendance numeric,
total_expenditure numeric,
tax_rate_ceiling_operating_funds numeric,
adjusted_tax_rate_incidental numeric,
adjusted_tax_rate_teachers numeric,
adjusted_tax_rate_debt_service numeric,
adjusted_tax_rate_capital_projects numeric,
assessed_valuation numeric
)
TABLESPACE pg_default;
ALTER TABLE IF EXISTS public.finance
OWNER to postgres;
/*Import data*/
COPY finance
FROM 'C:/Users...' --Truncated path
DELIMITER ','
CSV HEADER;
-- Table: public.local_tax
-- DROP TABLE IF EXISTS public.local_tax;
CREATE TABLE IF NOT EXISTS public.local_tax
(
district_code numeric,
district_name character varying COLLATE pg_catalog."default",
total_local_taxes numeric,
ada numeric,
local_tax_effort_per_ada numeric
)
TABLESPACE pg_default;
ALTER TABLE IF EXISTS public.local_tax
OWNER to postgres;
/*Import data*/
COPY local_tax
FROM 'C:/Users...' --Truncated path
DELIMITER ','
CSV HEADER;
Questions
This is where the fun begins.
/* What is the Min, Max, Mean, and Median child poverty ratio (cpr)?*/
SELECT MIN(child_poverty_ratio) AS min_child_poverty_ratio
, MAX(child_poverty_ratio) AS max_cpr
, ROUND(AVG(child_poverty_ratio), 2) AS mean_cpr
, PERCENTILE_DISC(0.5)
WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cpr
FROM combined;
1 records
| 2.5 |
51.8 |
18.78 |
17.9 |
/* What district has the lowest cpr? */
SELECT district_name
, child_poverty_ratio
FROM combined
WHERE child_poverty_ratio = (
SELECT MIN(child_poverty_ratio)
FROM combined
);
1 records
| KIRKWOOD R-VII |
2.5 |
/* What district has the highest cpr? */
SELECT district_name
, child_poverty_ratio
FROM combined
WHERE child_poverty_ratio = (
SELECT MAX(child_poverty_ratio)
FROM combined
);
1 records
| SEYMOUR R-II |
51.8 |
/* What district has the median cpr? */
SELECT district_name
, child_poverty_ratio
FROM combined
WHERE child_poverty_ratio = (
SELECT PERCENTILE_DISC(0.5)
WITHIN GROUP(ORDER BY child_poverty_ratio)
FROM combined
);
4 records
| BRADLEYVILLE R-I |
17.9 |
| CARL JUNCTION R-I |
17.9 |
| HANNIBAL 60 |
17.9 |
| UNION STAR R-II |
17.9 |
College Proficiency Category
/* How many districts are there in each college credit determination?*/
SELECT college_credit_determ
, COUNT(college_credit_determ) AS college_credit_totals
FROM combined
GROUP BY college_credit_determ
ORDER BY CASE
WHEN college_credit_determ = 'Target' THEN 1
WHEN college_credit_determ = 'Approaching' THEN 2
WHEN college_credit_determ = 'On Track' THEN 3
WHEN college_credit_determ = 'Floor' THEN 4 END;;
4 records
| Target |
310 |
| Approaching |
105 |
| On Track |
30 |
| Floor |
1 |
/* What district has the highest college credit percentage and what is their child poverty ratio?*/
SELECT district_name
, college_credit_pct
, college_credit_determ
, child_poverty_ratio
FROM combined
WHERE college_credit_pct = (
SELECT MAX(college_credit_pct)
FROM combined
);
1 records
| KIRKWOOD R-VII |
103.1 |
Target |
2.5 |
/* What district has the lowest college credit percentage and what is their child poverty ratio?*/
SELECT district_name
, college_credit_pct
, college_credit_determ
, child_poverty_ratio
FROM combined
WHERE college_credit_pct = (
SELECT MIN(college_credit_pct)
FROM combined
);
1 records
| OSBORN R-O |
3.7 |
Floor |
15.2 |
/* What district has the Median college credit percentage and what is their child poverty ratio?*/
SELECT district_name
, college_credit_pct
, college_credit_determ
, child_poverty_ratio
FROM combined
WHERE college_credit_pct = (SELECT PERCENTILE_DISC(0.5)
WITHIN GROUP(ORDER BY college_credit_pct)
FROM combined
);
2 records
| PALMYRA R-I |
56.7 |
Target |
10.8 |
| ST. JOSEPH |
56.7 |
Target |
17.6 |
/* What is the median, min, max, and mean poverty ratio in each college credit determination?*/
SELECT college_credit_determ
, PERCENTILE_DISC(0.5)
WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cp_ratio
, MIN(child_poverty_ratio) AS min_cpr
, MAX(child_poverty_ratio) AS max_cpr
, ROUND(AVG(child_poverty_ratio), 2) AS avg_cpr
FROM combined
GROUP BY college_credit_determ
ORDER BY CASE
WHEN college_credit_determ = 'Target' THEN 1
WHEN college_credit_determ = 'Approaching' THEN 2
WHEN college_credit_determ = 'On Track' THEN 3
WHEN college_credit_determ = 'Floor' THEN 4 END;
4 records
| Target |
17.2 |
2.5 |
51.8 |
17.78 |
| Approaching |
20.3 |
7.1 |
44.2 |
22.03 |
| On Track |
16.3 |
7.1 |
29.0 |
17.82 |
| Floor |
15.2 |
15.2 |
15.2 |
15.20 |
/* How many districts are in each college credit determination when poverty ratio is <= 7%*/
SELECT college_credit_determ
, COUNT(college_credit_determ) AS college_credit_totals
FROM combined
WHERE child_poverty_ratio <= 7
GROUP BY college_credit_determ
ORDER BY CASE
WHEN college_credit_determ = 'Target' THEN 1
WHEN college_credit_determ = 'Approaching' THEN 2
WHEN college_credit_determ = 'On Track' THEN 3
WHEN college_credit_determ = 'Floor' THEN 4 END;
/* At what child poverty level, if any, do more districts not meet the target for college credit than do? Round poverty ratio to nearest whole number. (Spoiler because you can't see the whole list: There are never more districts that don't meet the target than do)*/
WITH test AS (
SELECT college_credit_determ
, ROUND(child_poverty_ratio, 0) AS cp_ratio
, (CASE
WHEN college_credit_determ = 'Target' THEN 1
ELSE 0
END) target
, (CASE
WHEN college_credit_determ = 'Target' THEN 0
ELSE 1
END) not_target
FROM combined
)
SELECT college_credit_determ
, cp_ratio
, SUM(target) OVER (
--PARTITION BY cp_ratio
ORDER BY cp_ratio
) AS cumulative_target
, SUM(not_target) OVER (
--PARTITION BY cp_ratio
ORDER BY cp_ratio
) AS cumulative_not_target
FROM test
ORDER BY cp_ratio, college_credit_determ ;
Displaying records 1 - 10
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| Target |
4 |
13 |
0 |
| Target |
4 |
13 |
0 |
| Target |
4 |
13 |
0 |
| Target |
4 |
13 |
0 |
| Target |
4 |
13 |
0 |
Post Secondary Placement Category
/*Number of districts in each secondary placement determination*/
SELECT secondary_placement_determ
, COUNT(secondary_placement_determ) AS secondary_placement_totals
FROM combined
GROUP BY secondary_placement_determ
ORDER BY CASE
WHEN secondary_placement_determ = 'Target' THEN 1
WHEN secondary_placement_determ = 'Approaching' THEN 2
WHEN secondary_placement_determ = 'On Track' THEN 3
WHEN secondary_placement_determ = 'Floor' THEN 4 END;
4 records
| Target |
285 |
| Approaching |
24 |
| On Track |
130 |
| Floor |
7 |
/* What district has the lowest secondary placement percentage and what is their child poverty ratio?*/
SELECT district_name
, secondary_placement_pct
, secondary_placement_determ
, child_poverty_ratio
FROM combined
WHERE secondary_placement_pct = (
SELECT MIN(secondary_placement_pct)
FROM combined
);
1 records
| COOPER CO. R-IV |
58.9 |
Floor |
15.3 |
/* What district has the highest secondary placement percentage and what is their child poverty ratio?*/
SELECT district_name
, secondary_placement_pct
, secondary_placement_determ
, child_poverty_ratio
FROM combined
WHERE secondary_placement_pct = (
SELECT MAX(secondary_placement_pct)
FROM combined
);
Displaying records 1 - 10
| ATLANTA C-3 |
100 |
Target |
11.5 |
| CANTON R-V |
100 |
Target |
19.9 |
| CLARK CO. R-I |
100 |
Target |
20.3 |
| COLE CO. R-I |
100 |
Target |
6.3 |
| FAIRFAX R-III |
100 |
Target |
18.9 |
| IRON CO. C-4 |
100 |
Target |
19.6 |
| MACON CO. R-IV |
100 |
Target |
23.5 |
| MONTROSE R-XIV |
100 |
Target |
27.3 |
| NEWTOWN-HARRIS R-III |
100 |
Target |
25.8 |
| NORTHEAST NODAWAY CO. R-V |
100 |
Target |
12.6 |
/* What is the median, min, max, and mean poverty ratio in each secondary placement determination?*/
SELECT secondary_placement_determ
, PERCENTILE_DISC(0.5)
WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cp_ratio
, MIN(child_poverty_ratio)
, MAX(child_poverty_ratio)
, ROUND(AVG(child_poverty_ratio), 2)
FROM combined
GROUP BY secondary_placement_determ
ORDER BY CASE
WHEN secondary_placement_determ = 'Target' THEN 1
WHEN secondary_placement_determ = 'Approaching' THEN 2
WHEN secondary_placement_determ = 'On Track' THEN 3
WHEN secondary_placement_determ = 'Floor' THEN 4 END;
4 records
| Target |
16.4 |
2.5 |
44.2 |
17.76 |
| Approaching |
17.3 |
11.4 |
37.5 |
19.84 |
| On Track |
19.5 |
4.3 |
51.8 |
20.49 |
| Floor |
19.6 |
12.3 |
39.2 |
24.70 |
/* How many districts are in each secondary placement determination when poverty ratio is <= 7%*/
SELECT secondary_placement_determ
, COUNT(secondary_placement_determ) AS secondary_placement_totals
FROM combined
WHERE child_poverty_ratio <= 7
GROUP BY secondary_placement_determ
ORDER BY CASE
WHEN secondary_placement_determ = 'Target' THEN 1
WHEN secondary_placement_determ = 'Approaching' THEN 2
WHEN secondary_placement_determ = 'On Track' THEN 3
WHEN secondary_placement_determ = 'Floor' THEN 4 END;
2 records
| Target |
24 |
| On Track |
2 |
/* At what child poverty level, if any, do more districts not meet the target for secondary placement than do? Round poverty ratio to nearest whole number. (Spoiler because you can't see the whole list: There are never more districts that don't meet the target than do)*/
WITH test AS (
SELECT secondary_placement_determ
, ROUND(child_poverty_ratio, 0) AS cp_ratio
, (CASE
WHEN secondary_placement_determ = 'Target' THEN 1
ELSE 0
END) target
, (CASE
WHEN secondary_placement_determ = 'Target' THEN 0
ELSE 1
END) not_target
FROM combined
)
SELECT secondary_placement_determ
, cp_ratio
, SUM(target) OVER (
ORDER BY cp_ratio
) AS cumulative_target
, SUM(not_target) OVER (
ORDER BY cp_ratio
) AS cumulative_not_target
FROM test
ORDER BY cp_ratio, secondary_placement_determ ;
Displaying records 1 - 10
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| Target |
3 |
5 |
0 |
| On Track |
4 |
12 |
1 |
| Target |
4 |
12 |
1 |
| Target |
4 |
12 |
1 |
| Target |
4 |
12 |
1 |
| Target |
4 |
12 |
1 |
Assessment Category
/* How many districts are in each assessment determination?*/
SELECT assessment_determ
, COUNT(assessment_determ) AS assessment_totals
FROM combined
GROUP BY assessment_determ
ORDER BY CASE
WHEN assessment_determ = 'Target' THEN 1
WHEN assessment_determ = 'Approaching' THEN 2
WHEN assessment_determ = 'On Track' THEN 3
WHEN assessment_determ = 'Floor' THEN 4 END;
4 records
| Target |
189 |
| Approaching |
150 |
| On Track |
104 |
| Floor |
3 |
/* What district has the lowest assessment percentage and what is their child poverty ratio?*/
SELECT district_name
, assessment_pct
, assessment_determ
, child_poverty_ratio
FROM combined
WHERE assessment_pct = (
SELECT MIN(assessment_pct)
FROM combined
);
1 records
| RIVERVIEW GARDENS |
29.9 |
Floor |
28.4 |
/* What district has the highest assessment percentage and what is their child poverty ratio?*/
SELECT district_name
, assessment_pct
, assessment_determ
, child_poverty_ratio
FROM combined
WHERE assessment_pct = (
SELECT MAX(assessment_pct)
FROM combined
);
1 records
| LADUE |
100.3 |
Target |
3.1 |
/* What district has the Median assessment percentage and what is their child poverty ratio?*/
SELECT district_name
, assessment_pct
, assessment_determ
, child_poverty_ratio
FROM combined
WHERE assessment_pct = (SELECT PERCENTILE_DISC(0.5)
WITHIN GROUP(ORDER BY assessment_pct)
FROM combined
);
2 records
| NORTH ST. FRANCOIS CO. R-I |
70 |
On Track |
17.8 |
| RICHLAND R-I |
70 |
On Track |
23.9 |
/* What is the median, min, max poverty ratio in each assessment determination?*/
SELECT assessment_determ
, PERCENTILE_DISC(0.5)
WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cp_ratio
, MIN(child_poverty_ratio)
, MAX(child_poverty_ratio)
FROM combined
GROUP BY assessment_determ
ORDER BY CASE
WHEN assessment_determ = 'Target' THEN 1
WHEN assessment_determ = 'Approaching' THEN 2
WHEN assessment_determ = 'On Track' THEN 3
WHEN assessment_determ = 'Floor' THEN 4 END;
4 records
| Target |
14.0 |
2.5 |
37.7 |
| Approaching |
21.9 |
7.1 |
51.8 |
| On Track |
18.2 |
7.6 |
37.0 |
| Floor |
31.2 |
28.4 |
43.8 |
/* How many districts are in each assessment determination when poverty ratio is <= 7%*/
SELECT assessment_determ
, COUNT(assessment_determ) AS assessment_totals
FROM combined
WHERE child_poverty_ratio <= 7
GROUP BY assessment_determ
ORDER BY CASE
WHEN assessment_determ = 'Target' THEN 1
WHEN assessment_determ = 'Approaching' THEN 2
WHEN assessment_determ = 'On Track' THEN 3
WHEN assessment_determ = 'Floor' THEN 4 END;
/* At what child poverty level, if any, do more districts not meet the target for assessment than do? Round poverty ratio to nearest whole number.*/
WITH test AS (
SELECT assessment_determ
, ROUND(child_poverty_ratio, 0) AS cp_ratio
, (CASE
WHEN assessment_determ = 'Target' THEN 1
ELSE 0
END) target
, (CASE
WHEN assessment_determ = 'Target' THEN 0
ELSE 1
END) not_target
FROM combined
)
SELECT assessment_determ
, cp_ratio
, target
, not_target
, SUM(target) OVER (
--PARTITION BY cp_ratio
ORDER BY cp_ratio
) AS cumulative_target
, SUM(not_target) OVER (
--PARTITION BY cp_ratio
ORDER BY cp_ratio
) AS cumulative_not_target
FROM test
ORDER BY cp_ratio, assessment_determ ;
Displaying records 1 - 10
| Target |
3 |
1 |
0 |
5 |
0 |
| Target |
3 |
1 |
0 |
5 |
0 |
| Target |
3 |
1 |
0 |
5 |
0 |
| Target |
3 |
1 |
0 |
5 |
0 |
| Target |
3 |
1 |
0 |
5 |
0 |
| Target |
4 |
1 |
0 |
13 |
0 |
| Target |
4 |
1 |
0 |
13 |
0 |
| Target |
4 |
1 |
0 |
13 |
0 |
| Target |
4 |
1 |
0 |
13 |
0 |
| Target |
4 |
1 |
0 |
13 |
0 |
/* A child poverty ratio of 24 is the tipping point at which more districts do not meet the target assessment score than do. With that in mind, what percentage of districts have a poverty ratio above 24%? Round poverty ratio to nearest whole number.*/
SELECT SUM(CASE
WHEN child_poverty_ratio > 24 THEN 1 ELSE 0 END) AS number_above_24
, ROUND(SUM(CASE
WHEN child_poverty_ratio > 24 THEN 1 ELSE 0 END)::numeric/COUNT(child_poverty_ratio)*100,0) AS pct_above_24
FROM combined;