Employees

These are homework tasks on employees database.

Clone and init the database (requires a local PostgreSQL instance):

psql -U postgres -c 'CREATE DATABASE employees;'
git clone https://github.com/vrajmohan/pgsql-sample-data.git
psql -U postgres -d employees -f pgsql-sample-data/employee/employees.dump

Execute a PRQL query:

cd prql-compiler
cargo run compile examples/employees/average-title-salary.prql | psql -U postgres -d employees

Task 1

rank the employee titles according to the average salary for each department.

My solution:

  • for each employee, find their average salary,
  • join employees with their departments and titles (duplicating employees for each of their titles and departments)
  • group by department and title, aggregating average salary
  • join with department to get department name

PRQL

from salaries
group [emp_no] (
  aggregate [emp_salary = average salary]
)
join t=titles [==emp_no]
join dept_emp side:left [==emp_no]
group [dept_emp.dept_no, t.title] (
  aggregate [avg_salary = average emp_salary]
)
join departments [==dept_no]
select [dept_name, title, avg_salary]

SQL

WITH table_3 AS (
  SELECT
    AVG(salary) AS _expr_0,
    emp_no
  FROM
    salaries
  GROUP BY
    emp_no
),
table_1 AS (
  SELECT
    t.title,
    AVG(table_2._expr_0) AS avg_salary,
    dept_emp.dept_no
  FROM
    table_3 AS table_2
    JOIN titles AS t ON table_2.emp_no = t.emp_no
    LEFT JOIN dept_emp ON table_2.emp_no = dept_emp.emp_no
  GROUP BY
    dept_emp.dept_no,
    t.title
)
SELECT
  departments.dept_name,
  table_0.title,
  table_0.avg_salary
FROM
  table_1 AS table_0
  JOIN departments ON table_0.dept_no = departments.dept_no

Task 2

Estimate distribution of salaries and gender for each department departments.

PRQL

from e=employees
join salaries [==emp_no]
group [e.emp_no, e.gender] (
  aggregate [
    emp_salary = average salaries.salary
  ]
)
join de=dept_emp [==emp_no] side:left
group [de.dept_no, gender] (
  aggregate [
    salary_avg = average emp_salary,
    salary_sd = stddev emp_salary,
  ]
)
join departments [==dept_no]
select [dept_name, gender, salary_avg, salary_sd]

SQL

WITH table_3 AS (
  SELECT
    e.gender,
    AVG(salaries.salary) AS _expr_0,
    e.emp_no
  FROM
    employees AS e
    JOIN salaries ON e.emp_no = salaries.emp_no
  GROUP BY
    e.emp_no,
    e.gender
),
table_1 AS (
  SELECT
    table_2.gender,
    AVG(table_2._expr_0) AS salary_avg,
    STDDEV(table_2._expr_0) AS salary_sd,
    de.dept_no
  FROM
    table_3 AS table_2
    LEFT JOIN dept_emp AS de ON table_2.emp_no = de.emp_no
  GROUP BY
    de.dept_no,
    table_2.gender
)
SELECT
  departments.dept_name,
  table_0.gender,
  table_0.salary_avg,
  table_0.salary_sd
FROM
  table_1 AS table_0
  JOIN departments ON table_0.dept_no = departments.dept_no

Task 3

Estimate distribution of salaries and gender for each manager.

PRQL

from e=employees
join salaries [==emp_no]
group [e.emp_no, e.gender] (
  aggregate [
    emp_salary = average salaries.salary
  ]
)
join de=dept_emp [==emp_no]
join dm=dept_manager [
  (dm.dept_no == de.dept_no) && s"(de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)"
]
group [dm.emp_no, gender] (
  aggregate [
    salary_avg = average emp_salary,
    salary_sd = stddev emp_salary
  ]
)
derive mng_no = emp_no
join managers=employees [==emp_no]
derive mng_name = s"managers.first_name || ' ' || managers.last_name"
select [mng_name, managers.gender, salary_avg, salary_sd]

SQL

WITH table_5 AS (
  SELECT
    e.gender,
    AVG(salaries.salary) AS _expr_0,
    e.emp_no
  FROM
    employees AS e
    JOIN salaries ON e.emp_no = salaries.emp_no
  GROUP BY
    e.emp_no,
    e.gender
),
table_3 AS (
  SELECT
    AVG(table_4._expr_0) AS salary_avg,
    STDDEV(table_4._expr_0) AS salary_sd,
    dm.emp_no
  FROM
    table_5 AS table_4
    JOIN dept_emp AS de ON table_4.emp_no = de.emp_no
    JOIN dept_manager AS dm ON dm.dept_no = de.dept_no
    AND (de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)
  GROUP BY
    dm.emp_no,
    table_4.gender
),
table_1 AS (
  SELECT
    salary_avg,
    salary_sd,
    emp_no
  FROM
    table_3 AS table_2
)
SELECT
  managers.first_name || ' ' || managers.last_name AS mng_name,
  managers.gender,
  table_0.salary_avg,
  table_0.salary_sd
FROM
  table_1 AS table_0
  JOIN employees AS managers ON table_0.emp_no = managers.emp_no

Task 4

Find distributions of titles, salaries and genders for each department.

PRQL

from de=dept_emp
join s=salaries side:left [
  (s.emp_no == de.emp_no),
  s"({s.from_date}, {s.to_date}) OVERLAPS ({de.from_date}, {de.to_date})"
]
group [de.emp_no, de.dept_no] (
  aggregate salary = (average s.salary)
)
join employees [==emp_no]
join titles [==emp_no]
select [dept_no, salary, employees.gender, titles.title]

SQL

WITH table_1 AS (
  SELECT
    de.dept_no,
    AVG(s.salary) AS salary,
    de.emp_no
  FROM
    dept_emp AS de
    LEFT JOIN salaries AS s ON s.emp_no = de.emp_no
    AND (s.from_date, s.to_date) OVERLAPS (de.from_date, de.to_date)
  GROUP BY
    de.emp_no,
    de.dept_no
)
SELECT
  table_0.dept_no,
  table_0.salary,
  employees.gender,
  titles.title
FROM
  table_1 AS table_0
  JOIN employees ON table_0.emp_no = employees.emp_no
  JOIN titles ON table_0.emp_no = titles.emp_no