using Microsoft SQL Server Management Studio 2012

nuna23
Document55.docx

will cover these queries in lecture; feel free to copy/paste and execute to get a feel for them. These will be similar to the lab questions!

USE IMDB

/* SUBQUERY usage - including multiple subqueries */

/* Titles with a rating greater than five: */

SELECT *

FROM title_ratings

WHERE averageRating > 5;

/* Shows belonging to this group: */

SELECT tconst,

primaryTitle,

startYear

FROM title_basics

WHERE tconst IN

(SELECT tconst

FROM title_ratings

WHERE averageRating > 5);

/* Shows which are musicals or romances: */

SELECT *

FROM title_genre

WHERE genre IN ('Musical', 'Romance');

/* Data for shows belonging to the previous group but not this one: */

SELECT tconst,

primaryTitle,

startYear

FROM title_basics

WHERE tconst IN

(SELECT tconst

FROM title_ratings

WHERE averageRating > 5

)

AND tconst NOT IN

(SELECT tconst

FROM title_genre

WHERE genre IN ('Musical', 'Romance')

);

/* Nested subquery version - functional, but unnecessary (and confusing): */

SELECT tconst,

primaryTitle,

startYear

FROM title_basics

WHERE tconst IN

(SELECT tconst

FROM title_ratings

WHERE averageRating > 5

AND tconst NOT IN

(SELECT tconst

FROM title_genre

WHERE genre IN ('Musical', 'Romance')

)

);

/* More logical nested subquery: names of directors who have worked

on titles with rankings greater than five: */

SELECT DISTINCT name_basics.primaryName

FROM title_directors,

name_basics

WHERE name_basics.nconst = title_directors.nconst

AND title_directors.tconst IN

(SELECT tconst

FROM title_basics

WHERE tconst IN

(SELECT tconst

FROM title_ratings

WHERE averageRating > 5)

);

/* Subquery in the FROM clause - acts as a "virtual table".

Note: must include "AS" label, or you get an error! */

SELECT *

FROM

(SELECT primaryTitle AS MOVIENAME,

runtimeMinutes AS LENGTH

FROM title_basics

WHERE titleType = 'movie'

AND startYear = 1984

) AS FILMSFROM1984

ORDER BY MOVIENAME;

/* Common Table Expressions (aka CTE's) */

/* Create a "virtual table" which calculates final age (questionable results!): */

WITH my_cte AS

(SELECT nconst,

primaryName,

deathYear,

birthYear,

ISNULL(deathYear - birthYear, 0) AS finalAge

FROM name_basics)

SELECT *

FROM my_cte

ORDER BY finalAge DESC;

/* Why can't we include an ORDER BY clause in the CTE? Note the error */

WITH my_cte AS

(SELECT nconst,

primaryName,

deathYear,

birthYear,

ISNULL(deathYear - birthYear, 0) AS finalAge

FROM name_basics

ORDER BY finalAge)

SELECT *

FROM my_cte

ORDER BY finalAge DESC;

/* Use CTE to calculate how may times each person has performed a role;

In outer query, limit results to more than one occurrence: */

WITH profession_cte AS

(SELECT NB.primaryName,

TP.category,

COUNT(*) AS TOTAL

FROM title_principals TP JOIN name_basics NB ON TP.nconst = NB.nconst

GROUP BY NB.primaryName, TP.category)

SELECT *

FROM profession_cte

WHERE TOTAL > 1

ORDER BY primaryName, category;

/* HAVING clause usage */

/* ID's of directors who have worked on more than one film: */

SELECT TD.nconst,

COUNT(*) AS TOTALS

FROM title_directors TD JOIN title_basics TB ON TD.tconst = TB.tconst

GROUP BY TD.nconst

HAVING COUNT(*) > 1

ORDER BY TD.nconst;

/* Names of these directors, along with the names of their films: */

SELECT TD.nconst,

NB.primaryName,

TB.primaryTitle

FROM title_directors TD JOIN title_basics TB ON TD.tconst = TB.tconst

JOIN name_basics NB ON NB.nconst = TD.nconst

WHERE NB.nconst IN

(

SELECT TD.nconst

FROM title_directors TD JOIN title_basics TB ON TD.tconst = TB.tconst

GROUP BY TD.nconst

HAVING COUNT(*) > 1

)

ORDER BY TD.nconst,

NB.primaryName,

TB.primaryTitle;

/* All genres, along with the total number of titles for each: */

SELECT TG.genre,

COUNT(*)

FROM title_genre TG

GROUP BY TG.genre

ORDER BY COUNT(*);

/* Genres whose total number of titles is in a specified range: */

SELECT TG.genre,

COUNT(*)

FROM title_genre TG

GROUP BY TG.genre

HAVING COUNT(*) BETWEEN 50000 AND 100000

ORDER BY COUNT(*);

/* Titles in these genres: */

SELECT TB.primaryTitle,

TG.genre

FROM title_basics TB,

title_genre TG

WHERE TB.tconst = TG.tconst

AND TG.genre IN

(SELECT genre

FROM title_genre TG

GROUP BY TG.genre

HAVING COUNT(*) BETWEEN 50000 AND 100000

)

ORDER BY TB.primaryTitle;

/* Writers born after 1950, and their total number of titles: */

SELECT NB.primaryName,

NB.birthYear,

COUNT(*) AS TITLES

FROM name_basics NB JOIN title_writers TW ON NB.nconst = TW.nconst

WHERE NB.birthYear > 1950

GROUP BY NB.primaryName,

NB.birthYear

ORDER BY NB.primaryName,

NB.birthYear;

/* The same list, limited to totals in a given range: */

SELECT NB.primaryName,

NB.birthYear,

COUNT(*) AS TITLES

FROM name_basics NB JOIN title_writers TW ON NB.nconst = TW.nconst

WHERE NB.birthYear > 1950

GROUP BY NB.primaryName,

NB.birthYear

HAVING COUNT(*) BETWEEN 10 AND 100

ORDER BY NB.primaryName,

NB.birthYear;

/* Directors born after 1950, and their total number of titles: */

SELECT NB.primaryName,

NB.birthYear,

COUNT(*) AS TITLES

FROM name_basics NB JOIN title_directors TD ON NB.nconst = TD.nconst

WHERE NB.birthYear > 1950

GROUP BY NB.primaryName,

NB.birthYear

ORDER BY NB.primaryName,

NB.birthYear;

/* The same - but limit to directors who have written between 10 and 100 titles: */

SELECT NB.primaryName,

NB.birthYear,

COUNT(*) AS TITLES

FROM name_basics NB JOIN title_directors TD ON NB.nconst = TD.nconst

WHERE NB.birthYear > 1950

AND NB.nconst IN

(SELECT NB2.nconst

FROM name_basics NB2 JOIN title_writers TW ON NB2.nconst = TW.nconst

WHERE NB2.birthYear > 1950

GROUP BY NB2.nconst

HAVING COUNT(*) BETWEEN 10 AND 100

)

GROUP BY NB.primaryName,

NB.birthYear

ORDER BY NB.primaryName,

NB.birthYear;

/* How could we display the number of titles written? It's calculated in the

subquery, and not available in the outer SELECT clause...

Join all three tables in the outer query, and calculate totals on

DISTINCT values: */

SELECT NB.primaryName,

NB.birthYear,

COUNT(DISTINCT TD.tconst) AS TITLESDIRECTED,

COUNT(DISTINCT TW.tconst) AS TITLESWRITTEN

FROM name_basics NB JOIN title_directors TD ON NB.nconst = TD.nconst

JOIN title_writers TW ON NB.nconst = TW.nconst

WHERE NB.birthYear > 1950

AND NB.nconst IN

(SELECT TW.nconst

FROM title_writers TW

WHERE NB.nconst = TW.nconst

GROUP BY TW.nconst

HAVING COUNT(*) BETWEEN 10 AND 100

)

GROUP BY NB.primaryName,

NB.birthYear

ORDER BY NB.primaryName,

NB.birthYear;

/* Re-write to eliminate the need for the subquery (title_writers is

already available in the FROM clause): */

SELECT NB.primaryName,

NB.birthYear,

COUNT(DISTINCT TD.tconst) AS TITLESDIRECTED,

COUNT(DISTINCT TW.tconst) AS TITLESWRITTEN

FROM name_basics NB JOIN title_directors TD ON NB.nconst = TD.nconst

JOIN title_writers TW ON NB.nconst = TW.nconst

WHERE NB.birthYear > 1950

GROUP BY NB.primaryName,

NB.birthYear

HAVING COUNT(DISTINCT TW.tconst) BETWEEN 10 AND 100

ORDER BY NB.primaryName,

NB.birthYear;

/* ISNULL and COALESCE */

/* Consider titles with no value in the "originalTitle" column: */

SELECT TB.primaryTitle,

TB.originalTitle

FROM title_basics TB

WHERE originalTitle IS NULL;

/* Replace this empty value with a default: */

SELECT TB.primaryTitle,

ISNULL(TB.originalTitle,'[NONE]') AS ISNULLoriginal

FROM title_basics TB

WHERE originalTitle IS NULL;

/* The same effect, using COALESCE: */

SELECT TB.primaryTitle,

COALESCE(TB.originalTitle,'[NONE]') AS COALESCEoriginal

FROM title_basics TB

WHERE originalTitle IS NULL;

/* COALESCE allows you to list multiple values; the first non-null one gets returned: */

SELECT TB.primaryTitle,

TB.originalTitle,

TB.startYear,

TB.endYear,

COALESCE(TB.originalTitle, STR(TB.startYear), STR(TB.endYear), '[NONE]') AS FirstValue

FROM title_basics TB

WHERE originalTitle IS NULL;

/* Ranking functions */

/* Consider a subset of people: */

SELECT NB.primaryName,

NB.birthYear,

NB.deathYear

FROM name_basics NB

WHERE NB.birthYear = 1935

AND NB.deathYear IS NOT NULL;

/* Calculate rank based on several criteria. Note that display order does

not change their ranking: */

SELECT NB.primaryName,

NB.birthYear,

NB.deathYear,

ROW_NUMBER() OVER (ORDER BY NB.deathYear) AS DeathOrder,

ROW_NUMBER() OVER (ORDER BY NB.primaryName) AS NameOrder

FROM name_basics NB

WHERE NB.birthYear = 1935

AND NB.deathYear IS NOT NULL

ORDER BY 3;

/* The same query using RANK. Notice tie values (and gaps in the sequence): */

SELECT NB.primaryName,

NB.birthYear,

NB.deathYear,

RANK() OVER (ORDER BY NB.deathYear) AS DeathOrder,

RANK() OVER (ORDER BY NB.primaryName) AS NameOrder

FROM name_basics NB

WHERE NB.birthYear = 1935

AND NB.deathYear IS NOT NULL

ORDER BY 3;

/* DENSE_RANK gets rid of the gaps: */

SELECT NB.primaryName,

NB.birthYear,

NB.deathYear,

DENSE_RANK() OVER (ORDER BY NB.deathYear) AS DeathOrder,

DENSE_RANK() OVER (ORDER BY NB.primaryName) AS NameOrder

FROM name_basics NB

WHERE NB.birthYear = 1935

AND NB.deathYear IS NOT NULL

ORDER BY 3;