Wednesday, March 28, 2012

Poor performance for business day calculation from aspfaq sample

Greeting, below is the complete SQL taken from aspfaq.com (retrieved
from this newsgroup I believe) The query takes about two minutes to
run. Does anybody have a better set based way (sub-second response) to
determine business days?

CREATE TABLE dbo.Calendar

(

dt SMALLDATETIME NOT NULL PRIMARY KEY
CLUSTERED, -- Date value

IsWeekday BIT,
-- Is this date a weekday (M -
F)

IsHoliday BIT,
-- Is this date a holiday

Y SMALLINT,
-- Year the date falls in

FY SMALLINT,
-- Fiscal Year (needed?)

Q TINYINT,
-- Quarter date falls in

M TINYINT,
-- Numeric month of date

D TINYINT,
-- Numeric day of date

DW TINYINT,
-- Numeric DayOfWeek
(Sunda=1,Monday=2)

MonthName VARCHAR(9),
-- String name of month

DayName VARCHAR(9),
-- String name of day

W TINYINT
-- Week number

)

GO

-- Start & End Dates

DECLARE @.StartDate DATETIME

DECLARE @.EndDate DATETIME

SET @.StartDate = DATEADD(d, -1, '20000101')

SET @.EndDate = DATEADD(d, -1, '20300101')

-- Total number of dates to generate

DECLARE @.Days INT

SET @.Days = DATEDIFF(DAY, @.StartDate, @.EndDate)

-- Create temporary Numbers table

CREATE TABLE #Numbers

(

Number INT IDENTITY(1, 1) PRIMARY KEY CLUSTERED

)

-- Insert a number into our temp table for each date to be generated

WHILE COALESCE(SCOPE_IDENTITY(), 0) <= @.Days

BEGIN

INSERT #Numbers DEFAULT VALUES

END

-- Generate a date for each day in our timespan

INSERT Calendar(dt)

SELECT DATEADD(DAY, Number, @.StartDate)

FROM #Numbers

WHERE Number <= @.Days

ORDER BY Number

-- Remove the temporary Numbers table

DROP TABLE #Numbers

GO

-- Update other columns

UPDATE dbo.Calendar SET

IsWeekday = CASE WHEN DATEPART(DW, dt) IN (1, 7) THEN 0
ELSE 1 END,

IsHoliday = 0,

Y = YEAR(dt),

FY = YEAR(dt),

Q = CASE

WHEN MONTH(dt) <= 3 THEN 1

WHEN MONTH(dt) <= 6 THEN 2

WHEN MONTH(dt) <= 9 THEN 3

ELSE 4 END,

M = MONTH(dt),

D = DAY(dt),

DW = DATEPART(DW, dt),

MonthName = DATENAME(MONTH, dt),

DayName = DATENAME(DW, dt),

W = DATEPART(WK, dt)

-- Query in question (takes almost 2 minutes to execute and return a
value)

SELECT

C.dt

FROM

Calendar C

WHERE

C.IsWeekDay = 1

AND C.IsHoliday = 0

AND 9 = (SELECT COUNT(*) FROM Calendar C2 WHERE C2.dt >=
GETDATE() AND C2.dt <= C.dt AND C2.IsWeekDay = 1 AND C2.IsHoliday = 0 )Here's an easy one:

SELECT
CASE
WHEN ([Date] % 7) > 1 THEN 'Business day'
ELSE 'Weekend day'
END AS "IsBusinessDay"
FROM table

The symbol % here is the modulo operator.
The remainder of dividing Date by 7 returns
0 for Saturday, 1 for Sunday and up to 6 for Friday.
Remainders from 2 to 6 correspond to Monday to Friday.

GeoSynch

"pb648174" <google@.webpaul.net> wrote in message
news:1113345847.550164.212820@.l41g2000cwc.googlegr oups.com...
> Greeting, below is the complete SQL taken from aspfaq.com (retrieved
> from this newsgroup I believe) The query takes about two minutes to
> run. Does anybody have a better set based way (sub-second response) to
> determine business days?
> CREATE TABLE dbo.Calendar
> (
> dt SMALLDATETIME NOT NULL PRIMARY KEY
> CLUSTERED, -- Date value
> IsWeekday BIT,
> -- Is this date a weekday (M -
> F)
> IsHoliday BIT,
> -- Is this date a holiday
> Y SMALLINT,
> -- Year the date falls in
> FY SMALLINT,
> -- Fiscal Year (needed?)
> Q TINYINT,
> -- Quarter date falls in
> M TINYINT,
> -- Numeric month of date
> D TINYINT,
> -- Numeric day of date
> DW TINYINT,
> -- Numeric DayOfWeek
> (Sunda=1,Monday=2)
> MonthName VARCHAR(9),
> -- String name of month
> DayName VARCHAR(9),
> -- String name of day
> W TINYINT
> -- Week number
> )
> GO
>
> -- Start & End Dates
> DECLARE @.StartDate DATETIME
> DECLARE @.EndDate DATETIME
> SET @.StartDate = DATEADD(d, -1, '20000101')
> SET @.EndDate = DATEADD(d, -1, '20300101')
>
> -- Total number of dates to generate
> DECLARE @.Days INT
> SET @.Days = DATEDIFF(DAY, @.StartDate, @.EndDate)
>
> -- Create temporary Numbers table
> CREATE TABLE #Numbers
> (
> Number INT IDENTITY(1, 1) PRIMARY KEY CLUSTERED
> )
>
> -- Insert a number into our temp table for each date to be generated
> WHILE COALESCE(SCOPE_IDENTITY(), 0) <= @.Days
> BEGIN
> INSERT #Numbers DEFAULT VALUES
> END
>
> -- Generate a date for each day in our timespan
> INSERT Calendar(dt)
> SELECT DATEADD(DAY, Number, @.StartDate)
> FROM #Numbers
> WHERE Number <= @.Days
> ORDER BY Number
>
> -- Remove the temporary Numbers table
> DROP TABLE #Numbers
> GO
>
> -- Update other columns
> UPDATE dbo.Calendar SET
> IsWeekday = CASE WHEN DATEPART(DW, dt) IN (1, 7) THEN 0
> ELSE 1 END,
> IsHoliday = 0,
> Y = YEAR(dt),
> FY = YEAR(dt),
> Q = CASE
> WHEN MONTH(dt) <= 3 THEN 1
> WHEN MONTH(dt) <= 6 THEN 2
> WHEN MONTH(dt) <= 9 THEN 3
> ELSE 4 END,
> M = MONTH(dt),
> D = DAY(dt),
> DW = DATEPART(DW, dt),
> MonthName = DATENAME(MONTH, dt),
> DayName = DATENAME(DW, dt),
> W = DATEPART(WK, dt)
>
>
> -- Query in question (takes almost 2 minutes to execute and return a
> value)
> SELECT
> C.dt
> FROM
> Calendar C
> WHERE
> C.IsWeekDay = 1
> AND C.IsHoliday = 0
> AND 9 = (SELECT COUNT(*) FROM Calendar C2 WHERE C2.dt >=
> GETDATE() AND C2.dt <= C.dt AND C2.IsWeekDay = 1 AND C2.IsHoliday = 0 )|||We already have the isWeekday column in the table - what we need to
know is, how do I add 9 business days to a particular date efficiently?|||On 12 Apr 2005 15:44:07 -0700, pb648174 wrote:

> Greeting, below is the complete SQL taken from aspfaq.com (retrieved
> from this newsgroup I believe) The query takes about two minutes to
> run. Does anybody have a better set based way (sub-second response) to
> determine business days?
[snip]
> -- Query in question (takes almost 2 minutes to execute and return a
> value)
> SELECT
> C.dt
> FROM
> Calendar C
> WHERE
> C.IsWeekDay = 1
> AND C.IsHoliday = 0
> AND 9 = (SELECT COUNT(*) FROM Calendar C2 WHERE C2.dt >=
> GETDATE() AND C2.dt <= C.dt AND C2.IsWeekDay = 1 AND C2.IsHoliday = 0 )

It looks to me like you're looking for the day that is nine business days
from today. At least, once I let the query above run for the two minutes, I
got April 26th when I ran it today (April 13th).

If that's so, then you *know* ahead of time that that day will be greater
than today and less than 60 days from today, right? Unless there's some
weird span of sixty consecutive holidays, anyway. So you can add that fact
to the main where clause, and that will speed it up mightily:

SELECT
C.dt
FROM
Calendar C
WHERE
C.IsWeekDay = 1
AND C.IsHoliday = 0
AND C.dt BETWEEN GETDATE() AND DATEADD(d,60,GETDATE())
AND 9 = (
SELECT COUNT(*) FROM Calendar C2
WHERE C2.dt >= GETDATE()
AND C2.dt <= C.dt
AND C2.IsWeekDay = 1
AND C2.IsHoliday = 0)

Another thing you can do is to add indexes to the IsWeekDay and IsHoliday
columns (which requires changing them from BIT to TINYINT, since BIT can't
be indexed). Or even add a covering index on (DT, IsWeekDay, IsHoliday).
But that won't be necessary if you bound the main SELECT as I did -- I got
subsecond performance for the query above.|||That does execute much faster, but 50 is an arbitray number, and since
I will need to use it to schedule events up to one year or even
multiple years in the future, that won't work. In particular, for a set
of scheduled tasks which have a startdate and a lagdays column, I need
to calculate the end date for those tasks based on the above calendar
with business days/holidays entered. How would I do that with the above
query without the "bounding" limitation, returned as a set and with
decent performance?|||Here are some ideas:

1. This is fast but unfortunately in SQL2000 it can't be parameterized
without dynamic SQL:

SELECT MAX(dt)
FROM
(SELECT TOP 9 dt
FROM Calendar
WHERE dt >= CURRENT_TIMESTAMP
AND isweekday = 1
AND isholiday = 0
ORDER BY dt) AS T

2. This one can be parameterized but I wouldn't generally recommend it
because it relies on undocumented behaviour:

DECLARE @.days INTEGER, @.dt DATETIME

SET @.days = 9
SET ROWCOUNT @.days

SELECT @.dt = dt
FROM Calendar
WHERE dt >= CURRENT_TIMESTAMP
AND isweekday = 1
AND isholiday = 0
ORDER BY dt

SELECT @.dt

3. Extending Ross's suggestion, it shouldn't be difficult to calculate
a sensible upper bound for the query, even based on larger date ranges:

SELECT C.dt
FROM Calendar C
WHERE C.isweekday = 1
AND C.isholiday = 0
AND C.dt BETWEEN GETDATE() AND DATEADD(d,@.days*0.30+60.0,GETDATE())
AND 9 = (
SELECT COUNT(*) FROM Calendar C2
WHERE C2.dt >= GETDATE()
AND C2.dt <= C.dt
AND C2.IsWeekDay = 1
AND C2.IsHoliday = 0)

I think this last method is the best option, together with Ross's
suggestions on index improvements.

--
David Portas
SQL Server MVP
--|||And given that I have a table Called ScheduleTask with columns
StartDate & Duration, how do I for a group of rows, calculate the item
with the largest end date if the duration is based on the above
business days?

Why did you use 30%? Shouldn't it be 70% since I assume you are taking
out what you are guessing will be the weekends and holidays and adding
in a buffer of 60 days?|||On 14 Apr 2005 07:09:32 -0700, pb648174 wrote:

> That does execute much faster, but 50 is an arbitray number, and since
> I will need to use it to schedule events up to one year or even
> multiple years in the future, that won't work. In particular, for a set
> of scheduled tasks which have a startdate and a lagdays column, I need
> to calculate the end date for those tasks based on the above calendar
> with business days/holidays entered. How would I do that with the above
> query without the "bounding" limitation, returned as a set and with
> decent performance?

If you need to get sets, I think the best idea is to number the
business-days in the calendar table.

ALTER dbo.Calendar
ADD BusinessDayNum INT NOT NULL DEFAULT (0)

UPDATE dbo.calendar
SET BusinessDayNum = (
SELECT COUNT(*) FROM calendar C2
WHERE c2.isWeekday=1 and c2.isHoliday=0
AND C2.dt <= Calendar.dt
)
WHERE isWeekday=1 AND isHoliday=0

CREATE INDEX idx_Cal_BDN ON dbo.Calendar (BusinessDayNum)

Now you can do this:

CREATE TABLE #myTable
( startDate DATETIME NOT NULL, DurationDays INT NOT NULL)

INSERT #myTable VALUES ('2003-05-14',10)
INSERT #myTable VALUES ('2003-05-15',12)
INSERT #myTable VALUES ('2003-05-16',14)
INSERT #myTable VALUES ('2004-05-14',10)
INSERT #myTable VALUES ('2004-05-15',12)
INSERT #myTable VALUES ('2004-05-16',14)

SELECT T.startDate, T.DurationDays, C2.dt "endDate"
FROM dbo.Calendar C2, dbo.Calendar C1, #myTable T
WHERE C1.DT = T.startDate
AND C2.BusinessDayNum = C1.BusinessDayNum + T.DurationDays

startDate DurationDays endDate
-------- ---- -------
2003-05-14 00:00:00.000 10 2003-05-28 00:00:00
2003-05-15 00:00:00.000 12 2003-06-02 00:00:00
2003-05-16 00:00:00.000 14 2003-06-05 00:00:00
2004-05-14 00:00:00.000 10 2004-05-28 00:00:00
2004-05-15 00:00:00.000 12 2000-01-18 00:00:00
2004-05-16 00:00:00.000 14 2000-01-20 00:00:00

Unfortunately, you will need to have ALL of your holidays set before you do
this, and if your holidays change you should redo the UPDATE.|||That's why this won't work... We will actually be storing the holidays
per user in a separate table. We are basically trying to duplicate MS
Project functionality and are now thinking we should do it all in
application logic instead of SQL, i.e. port in all the task data, do
all the calculations and then do a couple hundred updates for all of
the task data. SQL gurus, please show me a way to not have to do that...|||"pb648174" <google@.webpaul.net> wrote in message
news:1113487772.592286.157190@.z14g2000cwz.googlegr oups.com...
> That does execute much faster, but 50 is an arbitray number, and since
> I will need to use it to schedule events up to one year or even
> multiple years in the future, that won't work. In particular, for a set
> of scheduled tasks which have a startdate and a lagdays column, I need
> to calculate the end date for those tasks based on the above calendar
> with business days/holidays entered. How would I do that with the above
> query without the "bounding" limitation, returned as a set and with
> decent performance?

Maybe try something like:
AND C.dt BETWEEN startdate AND DATEADD(d,3*duration,startdate)

Good Luck,
Jim|||The design for the Calendar table you have is not that good -- BIT
flags and other proprietary datatypes are a mess. There is no
Julianized day number, teh weekday numbers are nto ISO Standard, etc.

Sit down and design a good encoding for the type of day -- weekend,
holiday, workday, etc. A calendar table for US Secular holidays can be
built from the data at this website, so you will get the three-day
weekends:

http://www.smart.net/~mmontes/ushols.html

I would consider using a spreadsheet to get the raw data, since they
usually have good temporal functions. Try a query like this:

SELECT MIN(C1.cal_date)
FROM Calendar AS C1
WHERE @.workday_count
= (SELECT SUM(CASE WHEN C1.cal_date
BETWEEN CURRENT_TIMESTAMP
AND DATEADD(D,
(@.workday_count *10), CURRENT_TIMESTAMP)
AND day_type = 'workday'
THEN 1 ELSE 0 END);

I am assuming that the cal_date column is a clustered primary key so
the optimizer will know to stop and not scan the rest of the table on
the first match. Multiplying the count by ten is probably excessive,
but certainly safe. if that is too slow, change it to 2 or 3.|||I think this is a sign that SQL just isn't going to be able to handle
this kind of job effectively. There will be user holidays added to this
as well, so it is possible that someone could be out for two weeks or
more and that when adding 1 business day, *10 will not be sufficient. I
thought since I got this example from one of the regular poster's web
site that it would be straightforward to implement but that seems to
not be the case. I think we are just going to pull all the data in and
do the calculation in C# and then write all the data back out. Thanks
for the help everyone.|||I am getting a little lost here. The auxiliary Calendar table has
nothing to do with any particular user; it is for the enterprise as a
whole. A personal timesheet is a differrent matter, since each
employee's will be different.

Did you want to have code that says something like when day X is a
Moslem holiday and employee Y is a Moslem, then X is not a work day for
him? Ramada runs for a lunar month, for example, so if you do:

WHERE @.workday_count
= (SELECT SUM(CASE WHEN C1.cal_date
BETWEEN CURRENT_TIMESTAMP
AND DATEADD(D,
(@.workday_count *10 + 30), CURRENT_TIMESTAMP)
AND secular_day_type = 'workday'
AND relgion_day_type <> 'Moslem'
THEN 1 ELSE 0 END)

You know that you are safe. this same pattern can be extended to
include Mr. Y's vacation, sick and personal leave days with a join to
his personal timesheet. the predicate in the WHEN just gets uglier,
but so will any procedural code that has to have the same logic inside
a loop with IF-THEN statements. Timekeeping is not simple.|||The biggest problem with the query you posted is that the * 10 is not
sufficient for the previously posted reason. What I am saying about the
holidays is that we have an indeterminate amount of users and each will
have their own set of holidays so there will be a separate table for
those holidays and that the Calendar table would have the "master" or
default list of holidays.|||"pb648174" <google@.webpaul.net> wrote in message
news:1113826655.615952.214820@.o13g2000cwo.googlegr oups.com...
> The biggest problem with the query you posted is that the * 10 is not
> sufficient for the previously posted reason. What I am saying about the
> holidays is that we have an indeterminate amount of users and each will
> have their own set of holidays so there will be a separate table for
> those holidays and that the Calendar table would have the "master" or
> default list of holidays.

If you have a calendar table with all of the dates and day types:
(Dt, daytype)

And you have a user table with dates and day types only for those days that
are different
(User, Dt, daytype)

Then you can get bob's calendar with

Select Dt, Coalesce(u.daytype,m.daytype) as daytype
from calendar as m left join usercalendar as u on m.dt = u.dt
Where u.user = 'bob'

Regards,
Jim|||Table joins I don't have a problem with.. My problem is the following,
which I have mentioned in each of the past couple replies:

"There will be user holidays added to this
as well, so it is possible that someone could be out for two weeks or
more and that when adding 1 business day, *10 will not be sufficient."

and

"The biggest problem with the query you posted is that the * 10 is not
sufficient for the previously posted reason"

because a person could have two weeks of vacation, a month of sick
leave, etc. I need something that performs well and doesn't include
some "hacked" factor to make it perform well. The first hack was the
Calendar table, which made this "* x" factor necessary as an additional
hack...

So far I don't see any indication that SQL will be able to handle this
kind of task.|||OK here goes:

1. Create a table (temp or static with ONLY working days and a
contiguous numbering sequence between one day and the next working day)
- that makes it very easy to do the offset.

2. adding the user holiday is easy (find the number of days holiday
between the 2 dates and recalculate)

Sample code (written assuming UK date format - should work with US):

SET NOCOUNT ON

DECLARE @.StartDate datetime
DECLARE @.DaysOffset int

SET @.StartDate = '2005-04-20'
SET @.DaysOffset = 5

DECLARE @.DateWithOffsetNoUserHolidays datetime
DECLARE @.TempDay datetime
DECLARE @.nSequence int
DECLARE @.UserWorkingDayHolidays int
DECLARE @.DateWithOffset datetime

SET @.UserWorkingDayHolidays = 0

CREATE TABLE #WorkingDays ([Date] smalldatetime NOT NULL, [sequence] int
NOT NULL)

SET @.nSequence = 0
SET @.TempDay = '2005-01-01'

WHILE @.TempDay < '2006-12-31'
BEGIN

IF DATEPART(dw, @.TempDay) BETWEEN 2 AND 6 -- AND NOT EXISTS(SELECT 1
FROM BankHolidays WHERE [Date] = @.TempDay
BEGIN
INSERT INTO #WorkingDays VALUES(@.TempDay, @.nSequence)
SET @.nSequence = @.nSequence + 1
END

SELECT @.TempDay = DATEADD(dd,1, @.TempDay)
END

SELECT @.DateWithOffsetNoUserHolidays = [Date]
FROM #WorkingDays
WHERE [Sequence] = (SELECT [Sequence] + @.DaysOffset FROM #WorkingDays
WHERE [Date] = @.StartDate)

/*Get number of user holidays during period between @.StartDate and
@.DateWithOffset and redo:
e.g.
SELECT @.UserWorkingDayHolidays = Count(*)
FROM UserHolidays
WHERE Date BETWEEN @.StartDate AND @.DateWithOffsetNoUserHolidays AND
User = ...
*/

SELECT @.DateWithOffset = [Date]
FROM #WorkingDays
WHERE [Sequence] = (SELECT [Sequence] + @.DaysOffset +
@.UserWorkingDayHolidays FROM #WorkingDays WHERE [Date] = @.StartDate)

DROP TABLE #WorkingDays

SELECT @.DateWithOffset

*** Sent via Developersdex http://www.developersdex.com ***|||My last effort was flawed for the user holiday element of your
requirement. Here's a better effort, but doesn't work for calculating
bussiness days in the past - you'd have to change the penultimate select
for that.

Change @.StartDate and @.BusinessDaysOffset to change the inputs. Should
be sub-second even for a 10

The script build a temporary table of working days the first time it's
run - better to have a permanent table reallty.

Duncan

SET NOCOUNT ON

DECLARE @.StartDate datetime
DECLARE @.BusinessDaysOffset int

SET @.StartDate = '2005-04-21'
SET @.BusinessDaysOffset = 2

IF @.BusinessDaysOffset < 1
RAISERROR('Offset must be > 0',16,1)

/* Create sample temp table for user holidays */
IF OBJECT_ID('tempdb..#UserHolidays') IS NULL
BEGIN
CREATE TABLE #UserHolidays (UserId int NOT NULL, HolidayDate
smalldatetime)

/* Add tomorrow as a holiday for user 1 - date must not contain time -
so that's why there's so many CASTs */
INSERT INTO #UserHolidays VALUES (1, CAST(CAST(CAST(DATEADD(d, 1,
getdate()) as real) as int) as smalldatetime))
END

DECLARE @.TempDay datetime
DECLARE @.nSequence int

/* Get date without time element */
SET @.StartDate = CAST(CAST(CAST(@.StartDate as real) as int) as
smalldatetime)

IF OBJECT_ID('tempdb..#WorkingDays') IS NULL
BEGIN
/* better to use a permanent table as the data doesn't change very
often - but this is just an example */
CREATE TABLE #WorkingDays ([Date] smalldatetime NOT NULL, [sequence]
int NOT NULL)
CREATE UNIQUE INDEX ID_WorkingDays_Date ON #WorkingDays ([Date])

SET @.nSequence = 0
SET @.TempDay = '2005-01-01'

WHILE @.TempDay < '2030-12-31'
BEGIN
/* don't add Saturday or Sunday - or bank holidays from table */
IF DATEPART(dw, @.TempDay) BETWEEN 2 AND 6 /* AND NOT EXISTS(SELECT
1 FROM BankHolidays WHERE [Date] = @.TempDay) */
BEGIN
INSERT INTO #WorkingDays VALUES(@.TempDay, @.nSequence)
SET @.nSequence = @.nSequence + 1
END

SELECT @.TempDay = DATEADD(dd,1, @.TempDay)
END
END
/*Temp table populated */

/* Create temp table to contain working days for the user and after date
specified*/
CREATE TABLE #UserWorkingDays ([Date] smalldatetime NOT NULL)

CREATE UNIQUE INDEX ID_UserWorkingDays_Date ON #UserWorkingDays ([Date])

/*Limit the number of rows to the number of working days required */
SET ROWCOUNT @.BusinessDaysOffset

/* Insert working days after current one where not joined to user's
holiday table*/
INSERT INTO #UserWorkingDays ([Date])
SELECT WD.[Date]
FROM #WorkingDays AS WD
LEFT JOIN #UserHolidays AS UH ON WD.[Date] = UH.HolidayDate AND
UH.UserId = 1
WHERE UH.HolidayDate IS NULL AND [Date] > @.StartDate
ORDER BY WD.[Date]

/* Get last working day */
SELECT MAX([Date])
FROM #UserWorkingDays

DROP TABLE #UserWorkingDays

*** Sent via Developersdex http://www.developersdex.com ***|||I had tried the rowcount solution previously and it did not work for
some (unposted, due to other queries around the calculations and we
will eventually need time) reasons. We have given up on doing this in
SQL and are doing it in C# quite successfully - thank you everyone for
the help.sql

No comments:

Post a Comment