# HG changeset patch # User Matthew Wild # Date 1245602570 -3600 # Node ID 44416491923ebe610d951ac983a8fcea330ee7d5 Initial commit of ndp, the natural date processing library diff -r 000000000000 -r 44416491923e ndp.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ndp.lua Sun Jun 21 17:42:50 2009 +0100 @@ -0,0 +1,137 @@ +module(..., package.seeall); + +require "luarocks.require" +require "lpeg" + +-- Add case-insensitive string matching to Lpeg +function lpeg.Pi(s) + local patt = lpeg.P(true); + for c in s:gmatch(".") do + patt = patt * (lpeg.P(c:lower()) + lpeg.P(c:upper())); + end + return patt; +end + +function lpeg.one_of(list) + local patt = lpeg.P(false); + for _, match in ipairs(list) do + patt = patt + lpeg.Pi(match); + end + return patt; +end + +local ordinal = lpeg.P{ lpeg.C(lpeg.R("09")^-2) * (lpeg.Pi("st") + lpeg.Pi("nd") + lpeg.Pi("rd") + lpeg.Pi("th")) + 1 * lpeg.V(1) }; +local number = lpeg.R "09"^1 + +local day_name = lpeg.one_of {'monday', 'tuesday', 'wednesday', + 'thursday', 'friday', 'saturday', 'sunday'} + +local month_name = lpeg.one_of {'january', 'february', 'march', 'april', 'may', 'june', + 'july', 'august', 'september', 'october', 'november', 'december' } + +local year = (lpeg.R("09")^4)^-4 + +local unit_of_time = lpeg.one_of { 'second', 'minute', 'hour', 'day', 'week', 'month', 'year' } + +local time_of_day = lpeg.one_of { 'morning', 'noon', 'afternoon', 'evening', 'night', 'midnight' } +local time_of_days = { morning = 09, noon = 12, afternoon = 13, evening = 17, night = 21, midnight = 00 } + +local quantity; +local quantities = { + ["a"] = 1; + ["an"] = 1; + + ["a couple of"] = 2; + + ["a few"] = 3; + ["several"] = 3; + }; + +-- Create 'quantity' to match any of the quantities we know +do + local quantity_list = {}; + for k in pairs(quantities) do + quantity_list[#quantity_list+1] = k; + end + table.sort(quantity_list, function (a,b) return #a>#b; end); + quantity = number + lpeg.one_of(quantity_list); +end + +seconds_in_a = { second = 1 } +seconds_in_a.minute = seconds_in_a.second * 60; +seconds_in_a.hour = seconds_in_a.minute * 60; +seconds_in_a.day = seconds_in_a.hour * 24; +seconds_in_a.week = seconds_in_a.day * 7; +seconds_in_a.month = seconds_in_a.week * 4; +seconds_in_a.year = seconds_in_a.day * 365; + +local function get_time_part(time, part) + return os.date("*t", time)[part]; +end + +local function adjust_time(time, part, value) + local split_time = os.date("*t", time); + + split_time[part] = value; + + return os.time(split_time); +end + +function when(str, relative_to) + local time = relative_to or os.time(); + local P = lpeg.P; + + local patterns = + { + { P"tomorrow" / + function () + time = time + seconds_in_a.day; + end }; + { P"next week" / + function () + time = time + seconds_in_a.week; + end }; + { P"next year" / + function () + time = adjust_time(time, "year", get_time_part(time, "year") + 1); + end }; + { P"on "^0 * day_name / + function (day_name) + time = find_nearest_day_by_name(time, day_name); + end }; + { P"in "^0 * month_name / + function (month_name) + time = find_nearest_month_by_name(time, month_name); + end }; + { P"in "^0 * ( quantity * P" " * unit_of_time ) * (P"s"^-1) / + function (number_and_unit) + local number, unit = number_and_unit:gsub("^in ", ""):match("^(.+)%s+(.-)s?$"); + + number = quantities[number] or tonumber(number); + + time = time + seconds_in_a[unit] * number; + end }; + { (P"this " + P"in the " + P"at ")^0 * time_of_day / + function (time_of_day) + time_of_day = time_of_day:match("%S+$"); + time = adjust_time(time, "hour", time_of_days[time_of_day]); + if time_of_day == "noon" or time_of_day == "midnight" then + time = adjust_time(time, "min", 00); + else + time = adjust_time(time, "min", 30); + end + end }; + } + + local ret, pos; + for _, pattern in pairs(patterns) do + ret = lpeg.match(lpeg.P{ pattern[1] + 1 * lpeg.V(1) }, str); + if ret then + pos = ret; + --print("Matches ".._.." until "..ret); + end + end + + return time, pos; +end + diff -r 000000000000 -r 44416491923e notes.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/notes.txt Sun Jun 21 17:42:50 2009 +0100 @@ -0,0 +1,20 @@ + +when: [[in ]] || [[in ]] +[[ after ]] +the + +"next" <"week"|day of week|month> +"tomorrow" +"on" +"in" +<"in"|"after"> <"minutes"|"hours"|"days"|"months"|"years"> +"the" "in" + "weeks" +"on" "in" "weeks" + +"on" [date] +"at" [time] + +date: +time: PM + diff -r 000000000000 -r 44416491923e readdate.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readdate.lua Sun Jun 21 17:42:50 2009 +0100 @@ -0,0 +1,12 @@ +local ndp = require "ndp"; + +local base; +for line in io.lines() do + if line:match("^base:%d+$") then + base = tonumber(line:match("^base:(%d+)$")); + else + local ret = ndp.when(line, 0, base); + print(os.date("%c", ret).." ("..ret..")"); + end +end + diff -r 000000000000 -r 44416491923e tests.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests.lua Sun Jun 21 17:42:50 2009 +0100 @@ -0,0 +1,44 @@ +local when = require "ndp".when; + +if not when then + print("No 'when' function ?!"); +end + +local all_ok = true; + +local function check(input, output) + local ret = when(input, 0); + if ret ~= output then + print("FAIL: "..input.." [produces "..ret.."/"..os.date("!%c", ret).."]"); + all_ok = false; + return false; + end +end + +check("today", 0); +check("tomorrow", 86400); +check("tomorrow morning", 117000); +check("tomorrow noon", 126000); +check("tomorrow afternoon", 131400); +check("tomorrow evening", 145800); +check("tomorrow midnight", 82800); -- Correct, but expected? :) + +check("in 1 hour", 3600); +check("in an hour", 3600); +check("in 2 hours", 7200); +check("in a couple of hours", 7200); +check("in 3 hours", 10800); +check("in a few hours", 10800); + +check("in a day", 86400); +check("in 2 days", 172800); +check("in a couple of days", 172800); +check("in 3 days", 259200); +check("in a few days", 259200); + +check("next week", 604800); +check("next year", 31536000); + +if all_ok then + print("OK"); +end