Employees

These are homework tasks on employees database.

Clone and init the database (requires a local PostgreSQL instance):

$ psql -U postgres -c 'CREATE DATABASE employees;'
$ git clone https://github.com/vrajmohan/pgsql-sample-data.git
$ psql -U postgres -d employees -f pgsql-sample-data/employee/employees.dump

Execute a PRQL query:

$ cd prql-compiler
$ cargo run compile examples/employees/average-title-salary.prql | psql -U postgres -d employees

Task 1

rank the employee titles according to the average salary for each department.

My solution:

  • for each employee, find their average salary,
  • join employees with their departments and titles (duplicating employees for each of their titles and departments)
  • group by department and title, aggregating average salary
  • join with department to get department name

PRQL

from salaries
group [emp_no] (
  aggregate [emp_salary = average salary]
)
join t=titles [emp_no]
join dept_emp side:left [emp_no]
group [dept_emp.dept_no, t.title] (
  aggregate [avg_salary = average emp_salary]
)
join departments [dept_no]
select [dept_name, title, avg_salary]

SQL

WITH table_0 AS (
  SELECT
    emp_no,
    AVG(salary) AS emp_salary
  FROM
    salaries
  GROUP BY
    emp_no
),
table_1 AS (
  SELECT
    dept_emp.dept_no,
    t.title,
    AVG(table_0.emp_salary) AS avg_salary
  FROM
    table_0
    JOIN titles AS t USING(emp_no)
    LEFT JOIN dept_emp USING(emp_no)
  GROUP BY
    dept_emp.dept_no,
    t.title
)
SELECT
  departments.dept_name,
  table_1.title,
  table_1.avg_salary
FROM
  table_1
  JOIN departments USING(dept_no)

Task 2

Estimate distribution of salaries and gender for each department departments.

PRQL

from employees
join salaries [emp_no]
group [emp_no, gender] (
  aggregate [
    emp_salary = average salary
  ]
)
join de=dept_emp [emp_no] side:left
group [de.dept_no, gender] (
  aggregate [
    salary_avg = average emp_salary,
    salary_sd = stddev emp_salary,
  ]
)
join departments [dept_no]
select [dept_name, gender, salary_avg, salary_sd]

SQL

WITH table_0 AS (
  SELECT
    emp_no,
    gender,
    AVG(salary) AS emp_salary
  FROM
    employees
    JOIN salaries USING(emp_no)
  GROUP BY
    emp_no,
    gender
),
table_1 AS (
  SELECT
    de.dept_no,
    de.gender,
    AVG(table_0.emp_salary) AS salary_avg,
    STDDEV(table_0.emp_salary) AS salary_sd
  FROM
    table_0
    LEFT JOIN dept_emp AS de USING(emp_no)
  GROUP BY
    de.dept_no,
    de.gender
)
SELECT
  departments.dept_name,
  table_1.gender,
  table_1.salary_avg,
  table_1.salary_sd
FROM
  table_1
  JOIN departments USING(dept_no)

Task 3

Estimate distribution of salaries and gender for each manager.

PRQL

from employees
join salaries [emp_no]
group [emp_no, gender] (
  aggregate [
    emp_salary = average salary
  ]
)
join de=dept_emp [emp_no]
join dm=dept_manager [
  (dm.dept_no == de.dept_no) and s"(de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)"
]
group [dm.emp_no, gender] (
  aggregate [
    salary_avg = average emp_salary,
    salary_sd = stddev emp_salary
  ]
)
derive mng_no = dm.emp_no
join managers=employees [emp_no]
derive mng_name = s"managers.first_name || ' ' || managers.last_name"
select [mng_name, managers.gender, salary_avg, salary_sd]

SQL

WITH table_0 AS (
  SELECT
    emp_no,
    gender,
    AVG(salary) AS emp_salary
  FROM
    employees
    JOIN salaries USING(emp_no)
  GROUP BY
    emp_no,
    gender
),
table_1 AS (
  SELECT
    dm.emp_no,
    gender,
    AVG(table_0.emp_salary) AS salary_avg,
    STDDEV(table_0.emp_salary) AS salary_sd,
    dm.emp_no AS mng_no
  FROM
    table_0
    JOIN dept_emp AS de USING(emp_no)
    JOIN dept_manager AS dm ON dm.dept_no = de.dept_no
    AND (de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)
  GROUP BY
    dm.emp_no,
    gender
)
SELECT
  managers.first_name || ' ' || managers.last_name AS mng_name,
  managers.gender,
  table_1.salary_avg,
  table_1.salary_sd
FROM
  table_1
  JOIN employees AS managers USING(emp_no)

Task 4

Find distributions of titles, salaries and genders for each department.

PRQL

from de=dept_emp
join s=salaries side:left [
  (s.emp_no == de.emp_no),
  s"({s.from_date}, {s.to_date}) OVERLAPS ({de.from_date}, {de.to_date})"
]
group [de.emp_no, de.dept_no] (
  aggregate salary = (average s.salary)
)
join employees [emp_no]
join titles [emp_no]
select [dept_no, salary, employees.gender, titles.title]

SQL

WITH table_0 AS (
  SELECT
    de.emp_no,
    de.dept_no,
    AVG(s.salary) AS salary
  FROM
    dept_emp AS de
    LEFT JOIN salaries AS s ON s.emp_no = de.emp_no
    AND (s.from_date, s.to_date) OVERLAPS (de.from_date, de.to_date)
  GROUP BY
    de.emp_no,
    de.dept_no
)
SELECT
  table_0.dept_no,
  table_0.salary,
  employees.gender,
  titles.title
FROM
  table_0
  JOIN employees USING(emp_no)
  JOIN titles USING(emp_no)