read
SQL Queries, The Relationship Between Child Poverty and College and Career Readiness in Missouri School Districts

Purpose

When I started my exploration of The Relationship Between Child Poverty and College and Career Readiness in Missouri School Districts, I wanted to explore the data in two ways; through SQL queries and in R. The report from my exploration in R can be found here. Whereas the report in R focuses on the story of the data, this focuses on the queries themselves, this report documents the questions and the SQL queries used to answer them in PostgresSQL. For the full story, as well as more detailed information about understanding the data.
Note- In the report, I renamed the determinations for clarity. These queries use the original determinations

Create Tables and import data in Database

After collecting the relevant data, I needed to create tables within my Missouri Education database and import the data.

This one is long, Click to reveal query

-- Table: public.ccr

-- DROP TABLE IF EXISTS public.ccr;

CREATE TABLE IF NOT EXISTS public.ccr
(
    year integer,
    county_district_code integer,
    district_name character varying COLLATE pg_catalog."default" NOT NULL,
    is_k_12_district boolean,
    is_k_8_only_district boolean,
    beg_grade character varying COLLATE pg_catalog."default",
    end_grade integer,
    dist_exist_3_yrs boolean,
    is_growing_a_high_school boolean,
    assessment_curr_pct numeric,
    assessment_pyr2_pct numeric,
    assessment_pyr3_pct numeric,
    assessment_status_3yr_pct numeric,
    assessment_status_determination character varying COLLATE pg_catalog."default",
    assessment_prior_2yr_avg numeric,
    assessment_curr_2yr_pct_avg numeric,
    assessment_progress_determination character varying COLLATE pg_catalog."default",
    college_credit_curr_pct numeric,
    college_credit_pyr2_pct numeric,
    college_credit_pyr3_pct numeric,
    college_credit_status_3yr_pct numeric,
    college_credit_status_determination character varying COLLATE pg_catalog."default",
    college_credit_prior_2yr_avg numeric,
    college_credit_curr_2yr_pct_avg numeric,
    college_credit_progress_determination character varying COLLATE pg_catalog."default",
    secondary_placement_curr_pct numeric,
    secondary_placement_pyr2_pct numeric,
    secondary_placement_pyr3_pct numeric,
    secondary_placement_status_3yr_pct numeric,
    secondary_placement_status_determination character varying COLLATE pg_catalog."default",
    secondary_placement_prior_2yr_avg numeric,
    secondary_placement_curr_2_yr_pct_avg numeric,
    secondary_placement_progress_determination character varying COLLATE pg_catalog."default",
    CONSTRAINT ccr_pkey PRIMARY KEY (district_name)
)

TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.ccr
    OWNER to postgres;

/*Import data*/
COPY ccr
FROM 'C:/Users...' --Truncated path
DELIMITER ',' 
CSV HEADER;

-- Table: public.poverty

-- DROP TABLE IF EXISTS public.poverty;

CREATE TABLE IF NOT EXISTS public.poverty
(
    year integer,
    id numeric NOT NULL,
    district_name character varying COLLATE pg_catalog."default",
    grades character varying COLLATE pg_catalog."default",
    total_pop numeric,
    rel_pop_ages_5_to_17 numeric,
    rel_5_to_17_in_fam_in_pov numeric,
    rel_5_to_17_pov_ratio numeric,
    CONSTRAINT poverty_pkey PRIMARY KEY (id)
)

TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.poverty
    OWNER to postgres;

/*Import data*/
COPY poverty
FROM 'C:/Users...' --Truncated path
DELIMITER ',' 
CSV HEADER;

-- Table: public.finance

-- DROP TABLE IF EXISTS public.finance;

CREATE TABLE IF NOT EXISTS public.finance
(
    county_district_code numeric,
    district_name character varying COLLATE pg_catalog."default",
    year integer,
    pk_enrollment integer,
    enrollment numeric,
    percent_local numeric,
    percent_state numeric,
    percent_federal numeric,
    average_daily_attendance numeric,
    total_expenditure numeric,
    tax_rate_ceiling_operating_funds numeric,
    adjusted_tax_rate_incidental numeric,
    adjusted_tax_rate_teachers numeric,
    adjusted_tax_rate_debt_service numeric,
    adjusted_tax_rate_capital_projects numeric,
    assessed_valuation numeric
)

TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.finance
    OWNER to postgres;

/*Import data*/
COPY finance
FROM 'C:/Users...' --Truncated path
DELIMITER ',' 
CSV HEADER;

-- Table: public.local_tax

-- DROP TABLE IF EXISTS public.local_tax;

CREATE TABLE IF NOT EXISTS public.local_tax
(
    district_code numeric,
    district_name character varying COLLATE pg_catalog."default",
    total_local_taxes numeric,
    ada numeric,
    local_tax_effort_per_ada numeric
)

TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.local_tax
    OWNER to postgres;

/*Import data*/
COPY local_tax
FROM 'C:/Users...' --Truncated path
DELIMITER ',' 
CSV HEADER;

Combine Tables

Because the tables are part of my own database and there would be no new data, I decided I would save some time by combining the data I thought I might use into one table. This way I wouldn’t have to use join statements for every query. If there was a possibility of data being added, I would have left the tables as is in the database and queried them using join statements.

Another long one, Click to reveal query

/*Combine relevant data from all tables into one table.*/
CREATE TABLE combined AS
    SELECT ccr.year
        , ccr.district_name
        , finance.pk_enrollment
        , finance.enrollment
        , finance.average_daily_attendance
        , ROUND(finance.average_daily_attendance*100.00/finance.enrollment,2) AS percent_ada
        , ccr.assessment_status_3yr_pct AS assessment_pct
        , ccr.assessment_status_determination AS assessment_determ
        , ccr.college_credit_status_3yr_pct AS college_credit_pct
        , ccr.college_credit_status_determination AS college_credit_determ
        , ccr.secondary_placement_status_3yr_pct AS secondary_placement_pct
        , ccr.secondary_placement_status_determination AS secondary_placement_determ
        , finance.total_expenditure
        , finance.percent_local AS pct_funding_local
        , finance.percent_state AS pct_funding_state
        , finance.percent_federal AS pct_funding_federal
        , finance.assessed_valuation
        , finance.tax_rate_ceiling_operating_funds AS tax_rate_ceiling
        , ROUND((finance.tax_rate_ceiling_operating_funds*finance.assessed_valuation)/100) AS calculated_local_tax
        , local_tax.total_local_taxes AS reported_local_tax
        , poverty.rel_5_to_17_in_fam_in_pov AS children_in_poverty
        , poverty.rel_5_to_17_pov_ratio AS child_poverty_ratio
    FROM ccr
        LEFT JOIN finance
            ON ccr.district_name = finance.district_name
        LEFT JOIN local_tax
            on ccr.district_name = local_tax.district_name
        LEFT JOIN poverty
            ON ccr.district_name = poverty.district_name
    WHERE poverty.rel_5_to_17_pov_ratio IS NOT null
    ORDER BY ccr.district_name;
    

Questions

This is where the fun begins.


/* What is the Min, Max, Mean, and Median child poverty ratio (cpr)?*/

SELECT MIN(child_poverty_ratio) AS min_child_poverty_ratio
  , MAX(child_poverty_ratio) AS max_cpr
  , ROUND(AVG(child_poverty_ratio), 2) AS mean_cpr
  , PERCENTILE_DISC(0.5)
    WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cpr
FROM combined;
1 records
min_child_poverty_ratio max_cpr mean_cpr median_cpr
2.5 51.8 18.78 17.9
/* What district has the lowest cpr? */

SELECT district_name
  , child_poverty_ratio
FROM combined
WHERE child_poverty_ratio = (
  SELECT MIN(child_poverty_ratio)
  FROM combined
);
1 records
district_name child_poverty_ratio
KIRKWOOD R-VII 2.5

/* What district has the highest cpr? */

SELECT district_name
  , child_poverty_ratio
FROM combined
WHERE child_poverty_ratio = (
  SELECT MAX(child_poverty_ratio)
  FROM combined
);
1 records
district_name child_poverty_ratio
SEYMOUR R-II 51.8

/* What district has the median cpr? */

SELECT district_name
  , child_poverty_ratio
FROM combined
WHERE child_poverty_ratio = (
  SELECT PERCENTILE_DISC(0.5)
    WITHIN GROUP(ORDER BY child_poverty_ratio)
  FROM combined
);
4 records
district_name child_poverty_ratio
BRADLEYVILLE R-I 17.9
CARL JUNCTION R-I 17.9
HANNIBAL 60 17.9
UNION STAR R-II 17.9

College Proficiency Category

/* How many districts are there in each college credit determination?*/

SELECT college_credit_determ
    , COUNT(college_credit_determ) AS college_credit_totals
FROM combined
GROUP BY college_credit_determ
ORDER BY CASE
    WHEN college_credit_determ = 'Target' THEN 1
    WHEN college_credit_determ = 'Approaching' THEN 2 
    WHEN college_credit_determ = 'On Track' THEN 3 
    WHEN college_credit_determ = 'Floor' THEN 4 END;;
4 records
college_credit_determ college_credit_totals
Target 310
Approaching 105
On Track 30
Floor 1
/* What district has the highest college credit percentage and what is their child poverty ratio?*/

SELECT  district_name
    , college_credit_pct
    , college_credit_determ
    , child_poverty_ratio
FROM combined
WHERE college_credit_pct = (
    SELECT MAX(college_credit_pct)
    FROM combined
    );
1 records
district_name college_credit_pct college_credit_determ child_poverty_ratio
KIRKWOOD R-VII 103.1 Target 2.5

/* What district has the lowest college credit percentage and what is their child poverty ratio?*/

SELECT  district_name
    , college_credit_pct
    , college_credit_determ
    , child_poverty_ratio
FROM combined
WHERE college_credit_pct = (
    SELECT MIN(college_credit_pct)
    FROM combined
    ); 
    
1 records
district_name college_credit_pct college_credit_determ child_poverty_ratio
OSBORN R-O 3.7 Floor 15.2
    
/* What district has the Median college credit percentage and what is their child poverty ratio?*/

SELECT  district_name
    , college_credit_pct
    , college_credit_determ
    , child_poverty_ratio
FROM combined
WHERE college_credit_pct = (SELECT PERCENTILE_DISC(0.5) 
                            WITHIN GROUP(ORDER BY college_credit_pct) 
                            FROM combined
                           ); 
2 records
district_name college_credit_pct college_credit_determ child_poverty_ratio
PALMYRA R-I 56.7 Target 10.8
ST. JOSEPH 56.7 Target 17.6

/* What is the median, min, max, and mean poverty ratio in each college credit determination?*/

SELECT college_credit_determ
    , PERCENTILE_DISC(0.5)
        WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cp_ratio
    , MIN(child_poverty_ratio) AS min_cpr
    , MAX(child_poverty_ratio) AS max_cpr
    , ROUND(AVG(child_poverty_ratio), 2) AS avg_cpr
FROM combined
GROUP BY college_credit_determ
ORDER BY CASE
    WHEN college_credit_determ = 'Target' THEN 1
    WHEN college_credit_determ = 'Approaching' THEN 2 
    WHEN college_credit_determ = 'On Track' THEN 3 
    WHEN college_credit_determ = 'Floor' THEN 4 END;
4 records
college_credit_determ median_cp_ratio min_cpr max_cpr avg_cpr
Target 17.2 2.5 51.8 17.78
Approaching 20.3 7.1 44.2 22.03
On Track 16.3 7.1 29.0 17.82
Floor 15.2 15.2 15.2 15.20

/* How many districts are in each college credit determination when poverty ratio is <= 7%*/

SELECT college_credit_determ
    , COUNT(college_credit_determ) AS college_credit_totals
    FROM combined
WHERE child_poverty_ratio <= 7
GROUP BY college_credit_determ
ORDER BY CASE
    WHEN college_credit_determ = 'Target' THEN 1
    WHEN college_credit_determ = 'Approaching' THEN 2 
    WHEN college_credit_determ = 'On Track' THEN 3 
    WHEN college_credit_determ = 'Floor' THEN 4 END;
1 records
college_credit_determ college_credit_totals
Target 26

/* At what child poverty level, if any, do more districts not meet the target for college credit than do? Round poverty ratio to nearest whole number. (Spoiler because you can't see the whole list: There are never more districts that don't meet the target than do)*/

WITH test AS (
    SELECT college_credit_determ
        , ROUND(child_poverty_ratio, 0) AS cp_ratio
        , (CASE
            WHEN college_credit_determ = 'Target' THEN 1
            ELSE 0
        END) target
        , (CASE
            WHEN college_credit_determ = 'Target' THEN 0
            ELSE 1
          END) not_target
    FROM combined
    )
SELECT college_credit_determ
        , cp_ratio
        , SUM(target) OVER (
            --PARTITION BY cp_ratio
            ORDER BY cp_ratio
        ) AS cumulative_target
        , SUM(not_target) OVER (
            --PARTITION BY cp_ratio
            ORDER BY cp_ratio
        ) AS cumulative_not_target
FROM test
ORDER BY cp_ratio, college_credit_determ ;
Displaying records 1 - 10
college_credit_determ cp_ratio cumulative_target cumulative_not_target
Target 3 5 0
Target 3 5 0
Target 3 5 0
Target 3 5 0
Target 3 5 0
Target 4 13 0
Target 4 13 0
Target 4 13 0
Target 4 13 0
Target 4 13 0

Post Secondary Placement Category


/*Number of districts in each secondary placement determination*/

SELECT secondary_placement_determ
    , COUNT(secondary_placement_determ) AS secondary_placement_totals
FROM combined
GROUP BY secondary_placement_determ
ORDER BY CASE
    WHEN secondary_placement_determ = 'Target' THEN 1
    WHEN secondary_placement_determ = 'Approaching' THEN 2 
    WHEN secondary_placement_determ = 'On Track' THEN 3 
    WHEN secondary_placement_determ = 'Floor' THEN 4 END;
4 records
secondary_placement_determ secondary_placement_totals
Target 285
Approaching 24
On Track 130
Floor 7

/* What district has the lowest secondary placement percentage and what is their child poverty ratio?*/

SELECT  district_name
    , secondary_placement_pct
    , secondary_placement_determ
    , child_poverty_ratio
FROM combined
WHERE secondary_placement_pct = (
    SELECT MIN(secondary_placement_pct)
    FROM combined
    ); 
    
1 records
district_name secondary_placement_pct secondary_placement_determ child_poverty_ratio
COOPER CO. R-IV 58.9 Floor 15.3

/* What district has the highest secondary placement percentage and what is their child poverty ratio?*/

SELECT  district_name
    , secondary_placement_pct
    , secondary_placement_determ
    , child_poverty_ratio
FROM combined
WHERE secondary_placement_pct = (
    SELECT MAX(secondary_placement_pct)
    FROM combined
    );
    
Displaying records 1 - 10
district_name secondary_placement_pct secondary_placement_determ child_poverty_ratio
ATLANTA C-3 100 Target 11.5
CANTON R-V 100 Target 19.9
CLARK CO. R-I 100 Target 20.3
COLE CO. R-I 100 Target 6.3
FAIRFAX R-III 100 Target 18.9
IRON CO. C-4 100 Target 19.6
MACON CO. R-IV 100 Target 23.5
MONTROSE R-XIV 100 Target 27.3
NEWTOWN-HARRIS R-III 100 Target 25.8
NORTHEAST NODAWAY CO. R-V 100 Target 12.6

/* What is the median, min, max, and mean poverty ratio in each secondary placement determination?*/

SELECT secondary_placement_determ
    , PERCENTILE_DISC(0.5)
        WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cp_ratio
    , MIN(child_poverty_ratio)
    , MAX(child_poverty_ratio)
    , ROUND(AVG(child_poverty_ratio), 2)
FROM combined
GROUP BY secondary_placement_determ
ORDER BY CASE
    WHEN secondary_placement_determ = 'Target' THEN 1
    WHEN secondary_placement_determ = 'Approaching' THEN 2 
    WHEN secondary_placement_determ = 'On Track' THEN 3 
    WHEN secondary_placement_determ = 'Floor' THEN 4 END;
4 records
secondary_placement_determ median_cp_ratio min max round
Target 16.4 2.5 44.2 17.76
Approaching 17.3 11.4 37.5 19.84
On Track 19.5 4.3 51.8 20.49
Floor 19.6 12.3 39.2 24.70

/* How many districts are in each secondary placement determination when poverty ratio is <= 7%*/

SELECT secondary_placement_determ
    , COUNT(secondary_placement_determ) AS secondary_placement_totals
    FROM combined
WHERE child_poverty_ratio <= 7
GROUP BY secondary_placement_determ
ORDER BY CASE
    WHEN secondary_placement_determ = 'Target' THEN 1
    WHEN secondary_placement_determ = 'Approaching' THEN 2 
    WHEN secondary_placement_determ = 'On Track' THEN 3 
    WHEN secondary_placement_determ = 'Floor' THEN 4 END;
2 records
secondary_placement_determ secondary_placement_totals
Target 24
On Track 2

/* At what child poverty level, if any, do more districts not meet the target for secondary placement than do? Round poverty ratio to nearest whole number. (Spoiler because you can't see the whole list: There are never more districts that don't meet the target than do)*/

WITH test AS (
    SELECT secondary_placement_determ
        , ROUND(child_poverty_ratio, 0) AS cp_ratio
        , (CASE
            WHEN secondary_placement_determ = 'Target' THEN 1
            ELSE 0
        END) target
        , (CASE
            WHEN secondary_placement_determ = 'Target' THEN 0
            ELSE 1
          END) not_target
    FROM combined
    )
SELECT secondary_placement_determ
        , cp_ratio
        , SUM(target) OVER (
            ORDER BY cp_ratio
        ) AS cumulative_target
        , SUM(not_target) OVER (
            ORDER BY cp_ratio
        ) AS cumulative_not_target
FROM test
ORDER BY cp_ratio, secondary_placement_determ ;
Displaying records 1 - 10
secondary_placement_determ cp_ratio cumulative_target cumulative_not_target
Target 3 5 0
Target 3 5 0
Target 3 5 0
Target 3 5 0
Target 3 5 0
On Track 4 12 1
Target 4 12 1
Target 4 12 1
Target 4 12 1
Target 4 12 1

Assessment Category


/* How many districts are in each assessment determination?*/

SELECT assessment_determ
    , COUNT(assessment_determ) AS assessment_totals
FROM combined
GROUP BY assessment_determ
ORDER BY CASE
    WHEN assessment_determ = 'Target' THEN 1
    WHEN assessment_determ = 'Approaching' THEN 2 
    WHEN assessment_determ = 'On Track' THEN 3 
    WHEN assessment_determ = 'Floor' THEN 4 END;
4 records
assessment_determ assessment_totals
Target 189
Approaching 150
On Track 104
Floor 3

/* What district has the lowest assessment percentage and what is their child poverty ratio?*/

SELECT  district_name
    , assessment_pct
    , assessment_determ
    , child_poverty_ratio
FROM combined
WHERE assessment_pct = (
    SELECT MIN(assessment_pct)
    FROM combined
    ); 
    
1 records
district_name assessment_pct assessment_determ child_poverty_ratio
RIVERVIEW GARDENS 29.9 Floor 28.4
    
/* What district has the highest assessment percentage and what is their child poverty ratio?*/

SELECT  district_name
    , assessment_pct
    , assessment_determ
    , child_poverty_ratio
FROM combined
WHERE assessment_pct = (
    SELECT MAX(assessment_pct)
    FROM combined
    );
    
1 records
district_name assessment_pct assessment_determ child_poverty_ratio
LADUE 100.3 Target 3.1
    
/* What district has the Median assessment percentage and what is their child poverty ratio?*/

SELECT  district_name
    , assessment_pct
    , assessment_determ
    , child_poverty_ratio
FROM combined
WHERE assessment_pct = (SELECT PERCENTILE_DISC(0.5) 
                            WITHIN GROUP(ORDER BY assessment_pct) 
                            FROM combined
                           );
2 records
district_name assessment_pct assessment_determ child_poverty_ratio
NORTH ST. FRANCOIS CO. R-I 70 On Track 17.8
RICHLAND R-I 70 On Track 23.9

/* What is the median, min, max poverty ratio in each assessment determination?*/

SELECT assessment_determ
    , PERCENTILE_DISC(0.5)
        WITHIN GROUP(ORDER BY child_poverty_ratio) AS median_cp_ratio
    , MIN(child_poverty_ratio)
    , MAX(child_poverty_ratio)
FROM combined
GROUP BY assessment_determ
ORDER BY CASE
    WHEN assessment_determ = 'Target' THEN 1
    WHEN assessment_determ = 'Approaching' THEN 2 
    WHEN assessment_determ = 'On Track' THEN 3 
    WHEN assessment_determ = 'Floor' THEN 4 END;
4 records
assessment_determ median_cp_ratio min max
Target 14.0 2.5 37.7
Approaching 21.9 7.1 51.8
On Track 18.2 7.6 37.0
Floor 31.2 28.4 43.8

/* How many districts are in each assessment determination when poverty ratio is <= 7%*/

SELECT assessment_determ
    , COUNT(assessment_determ) AS assessment_totals
    FROM combined
WHERE child_poverty_ratio <= 7
GROUP BY assessment_determ
ORDER BY CASE
    WHEN assessment_determ = 'Target' THEN 1
    WHEN assessment_determ = 'Approaching' THEN 2 
    WHEN assessment_determ = 'On Track' THEN 3 
    WHEN assessment_determ = 'Floor' THEN 4 END;
1 records
assessment_determ assessment_totals
Target 26

/* At what child poverty level, if any, do more districts not meet the target for assessment than do? Round poverty ratio to nearest whole number.*/

WITH test AS (
    SELECT assessment_determ
        , ROUND(child_poverty_ratio, 0) AS cp_ratio
        , (CASE
            WHEN assessment_determ = 'Target' THEN 1
            ELSE 0
        END) target
        , (CASE
            WHEN assessment_determ = 'Target' THEN 0
            ELSE 1
          END) not_target
    FROM combined
    )
SELECT assessment_determ
        , cp_ratio
        , target
        , not_target
        , SUM(target) OVER (
            --PARTITION BY cp_ratio
            ORDER BY cp_ratio
        ) AS cumulative_target
        , SUM(not_target) OVER (
            --PARTITION BY cp_ratio
            ORDER BY cp_ratio
        ) AS cumulative_not_target
FROM test
ORDER BY cp_ratio, assessment_determ ;
Displaying records 1 - 10
assessment_determ cp_ratio target not_target cumulative_target cumulative_not_target
Target 3 1 0 5 0
Target 3 1 0 5 0
Target 3 1 0 5 0
Target 3 1 0 5 0
Target 3 1 0 5 0
Target 4 1 0 13 0
Target 4 1 0 13 0
Target 4 1 0 13 0
Target 4 1 0 13 0
Target 4 1 0 13 0

/* A child poverty ratio of 24 is the tipping point at which more districts do not meet the target assessment score than do. With that in mind, what percentage of districts have a poverty ratio above 24%? Round poverty ratio to nearest whole number.*/

SELECT SUM(CASE
            WHEN child_poverty_ratio > 24 THEN 1 ELSE 0 END) AS number_above_24
  , ROUND(SUM(CASE
            WHEN child_poverty_ratio > 24 THEN 1 ELSE 0 END)::numeric/COUNT(child_poverty_ratio)*100,0) AS pct_above_24
FROM combined;
1 records
number_above_24 pct_above_24
121 27

Thoughts

This exercise really proves the need for visuals to really understand the data. Just looking at this data, it wouldn’t seem like there is any connection between College and Career readiness and child poverty anywhere except possibly the assessment category. But when you start seeing all of the data visually, you get a better overall picture. If you’d like to see the full report with visuals, you can find it here.

Blog Logo

Melissa Harper


Published

Image

Melissa Harper

Welcome! On this site you'll find information about myself, my tech skills, accomplishments, and some projects I have worked on over the years.

Back to Overview