// ==UserScript==
// @name          hCard Parser
// @version       2005-09-15
// @namespace     http://diveintomark.org/projects/greasemonkey/
// @description   convert hCards into vCards (example program with unit tests)
// @include       *
// ==/UserScript==

/* BEGIN LICENSE BLOCK
Copyright (C) 2005 Mark Pilgrim

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You can download a copy of the GNU General Public License at
http://diveintomark.org/projects/greasemonkey/COPYING
or get a free printed copy by writing to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

END LICENSE BLOCK */

const PropertyType = {
    STRING: 1,
    DATE: 2,
    URI: 3,
    NODE: 4,
    EMAIL: 5
}

Array.prototype.contains = function(sString) {
    for (var i = 0; i < this.length; i++) {
	if (this[i] == sString) {
	    return true;
	}
    }
    return false;
}

Array.prototype.unique = function() {
    var arResults = [];
    var oDuplicates = {};
    for (var i = 0; i < this.length; i++) {
	var sKey = this[i];
	if (oDuplicates[sKey]) { continue; }
	arResults.push(sKey);
	oDuplicates[sKey] = 1;
    }
    return arResults;
}

String.prototype.replaceString = function(sOld, sNew) {
    var re = '';
    var arSpecialChars = ['\\', '[', ']', '(', ')', '.', '*', '+', '^', '$', '?', '|', '{', '}'];
    for (var i = 0; i < sOld.length; i++) {
	var c = sOld.charAt(i);
	if (arSpecialChars.contains(c)) {
	    re += '\\' + c;
	} else {
	    re += c;
	}
    }
    var oRegExp = new RegExp('(' + re + ')', 'gim');
    return this.replace(oRegExp, sNew);
}

String.prototype.lpad = function(cPadder, iMaxLen) {
    var s = this;
    for (var i = s.length; i < iMaxLen; i++) {
	s = cPadder + s;
    }
    return s;
}

String.prototype.endswith = function(sMatch) {
    var iPos = this.lastIndexOf(sMatch);
    return (iPos != -1 && (iPos == this.length - sMatch.length));
}

String.prototype.startswith = function(sMatch) {
    return this.indexOf(sMatch) == 0;
}

String.prototype.trim = function() {
    return this.replace(/^\s*(\S*(\s+\S+)*)\s*$/, "$1");
}

String.prototype.normalize = function() {
    return this.replace(/\s+/g, ' ').trim();
}

String.prototype.substringBefore = function(sDelimeter) {
    var s = this;
    var iPos = s.indexOf(sDelimeter);
    if (iPos != -1) {
	s = s.substring(0, iPos);
    }
    return s;
}

String.prototype.substringAfterFirst = function(sDelimeter) {
    var s = this;
    var iPos = s.indexOf(sDelimeter);
    if (iPos != -1) {
	s = s.substring(iPos + sDelimeter.length);
    }
    return s;
}

String.prototype.substringAfterLast = function(sDelimeter) {
    var s = this;
    var iPos = s.lastIndexOf(sDelimeter);
    if (iPos != -1) {
	s = s.substring(iPos + sDelimeter.length);
    }
    return s;
}

String.prototype.containsAny = function(arKeywords) {
    var s = this.toLowerCase();
    for (var i = 0; i < arKeywords.length; i++) {
	var sKeyword = arKeywords[i].toLowerCase();
	if (s.indexOf(sKeyword) != -1) {
	    return true;
	}
    }
    return false;
}

Date.prototype.toISO8601 = function() {
    return this.getUTCFullYear() + '-' +
	(this.getUTCMonth() + 1).toString().lpad('0', 2) + '-' +
	this.getUTCDate().toString().lpad('0', 2) + 'T' +
	this.getUTCHours().toString().lpad('0', 2) + ':' +
	this.getUTCMinutes().toString().lpad('0', 2) + ':' +
        this.getUTCSeconds().toString().lpad('0', 2) + 'Z';
}

var _parse = Date.parse;
Date.parse = function(sDate) {
    function parseW3DTF(sDate) {
	var sDate = (sDate || '').toUpperCase();
	var match = /^(\d{4})(\D(\d{2})(\D(\d{2})(\D(\d{2})(\D(\d{2})(\D(\d{2}))?)?)?)?)?/.exec(sDate);
	if (!match || !match.length) { return; }
	var year = +match[1];
	var month = +match[3] || 1;
	var day = +match[5] || 1;
	var hour = +match[7] || 0;
	var minute = +match[9] || 0;
	var second = +match[11] || 0;
	var tzOffset = 0;
	if (match[2]) {
	    sDate = sDate.replaceString(match[2], '');
	}
	if (match[1]) {
	    sDate = sDate.replaceString(match[1], '');
	}
	var tzMatch = /([+-])(\d{2})\D(\d{2})$/.exec(sDate);
	if (tzMatch) {
	    tzOffset = +tzMatch[2];
	    if (tzMatch[1] == '-') {
		tzOffset = -tzOffset;
	    }
	    hour -= tzOffset;
	}
	var date = new Date(0);
	date.setUTCFullYear(year);
	date.setUTCMonth(month - 1);
	date.setUTCDate(day);
	date.setUTCHours(hour);
	date.setUTCMinutes(minute);
	date.setUTCSeconds(second);
	return date.getTime();
    }

    var dateParsed = _parse(sDate);
    if (!dateParsed) {
	dateParsed = parseW3DTF(sDate);
    }
    return dateParsed || null;
}

function NSResolver(prefix) {
    return {'xhtml': 'http://www.w3.org/1999/xhtml'}[prefix];
}

function vcardEscape(s) {
    if (typeof s == 'string') {
	return s.replace(/,/g, '\\,').replace(/;/g, '\\;').replace(/\n/g, '\\n');
    } else {
	return s;
    }
}

function vcardFold(s) {
    while (s.endswith(';')) {
	s = s.substring(0, s.length - 1);
    }
    var sFolded = '';
    var iMax = 75;
    var sPrefix = '';
    while (s.length > iMax) {
	sFolded += sPrefix + s.substring(0, iMax) + '\n';
	s = s.substring(iMax);
	sPrefix = ' ';
	iMax = 74;
    }
    sFolded += sPrefix + s;
    return sFolded;
}

function getPropertyValue(elmRoot, sProperty, iPropertyType, bAllowMultiple, bAutoEscape) {
    sProperty = sProperty.toLowerCase();
    var sPrefix = document.documentElement.namespaceURI ? 'xhtml:' : '';
    var bFound = false;
    var bNormalize = true;
    if (bAllowMultiple && iPropertyType != PropertyType.NODE) {
	var xpath = ".//" + sPrefix + "ul[contains(concat(' ', normalize-space(@class), ' '), ' " + sProperty + " ')]/" + sPrefix + "li|.//" + sPrefix + "ol[contains(concat(' ', normalize-space(@class), ' '), ' " + sProperty + " ')]/" + sPrefix + "li";
        var snapResults = document.evaluate(xpath, elmRoot, NSResolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
	bFound = (snapResults.snapshotLength != 0);
    }
    if (!bFound) {
	var xpath = ".//" + sPrefix + "*[contains(concat(' ', normalize-space(@class) ,' '), ' " + sProperty + " ')]";
	var snapResults = document.evaluate(xpath, elmRoot, NSResolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
	bFound = (snapResults.snapshotLength != 0);
    }
    if (!bFound && sProperty == 'value') {
	xpath = ".//" + sPrefix + "pre";
	snapResults = document.evaluate(xpath, elmRoot, NSResolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
	bFound = (snapResults.snapshotLength != 0);
	bNormalize = !bFound;
	if (!bFound) {
	    xpath = ".";
	    snapResults = document.evaluate(xpath, elmRoot, NSResolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
	    bFound = (snapResults.snapshotLength != 0);
	}
    }
    var arFilter = [];
    if (sProperty == 'vcard') {
	var snapFilter = document.evaluate("//*[contains(concat(' ', normalize-space(@class), ' '), ' vcard ')][ancestor::*[contains(concat(' ', normalize-space(@class), ' '), ' vcard ')]]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
	for (var i = 0; i < snapFilter.snapshotLength; i++) {
	    arFilter.push(snapFilter.snapshotItem(i));
	}
    }
    var arResults = [];
    for (var i = 0; i < snapResults.snapshotLength; i++) {
	var elmResult = snapResults.snapshotItem(i);
	if (!arFilter.contains(elmResult)) {
	    arResults.push(elmResult);
	}
    }
    bFound = (arResults.length != 0);
    if (!bFound) {
	if (bAllowMultiple) { return []; }
	switch (iPropertyType) {
	case PropertyType.STRING: return '';
	case PropertyType.DATE: return null;
	case PropertyType.URI: return '';
	case PropertyType.NODE: return {};
	}
	return null;
    }
    var arValues = [];
    for (var i = 0; i < arResults.length; i++) {
	var elmResult = arResults[i];
	var sValue = null;
	if (iPropertyType == PropertyType.NODE) {
	    if (bAllowMultiple) {
		arValues.push(elmResult);
		continue;
	    } else {
		return elmResult;
	    }
	}
	var sNodeName = elmResult.nodeName.toLowerCase();
	if (iPropertyType == PropertyType.EMAIL && sNodeName == 'a') {
	    sValue = (elmResult.href || '').substringAfterFirst('mailto:').substringBefore('?');
	}
	if (sValue) { sValue = bNormalize ? sValue.normalize() : sValue.trim(); }
	if (!sValue && sNodeName == 'abbr') {
	    sValue = elmResult.title;
	}
	if (sValue) { sValue = bNormalize ? sValue.normalize() : sValue.trim(); }
	if (!sValue && iPropertyType == PropertyType.URI) { 
	    switch (sNodeName) {
	    case 'a': sValue = elmResult.href; break;
	    case 'img': sValue = elmResult.src; break;
	    case 'object': sValue = elmResult.data; break;
	    }
	}
	if (sValue) { sValue = bNormalize ? sValue.normalize() : sValue.trim(); }
	if (!sValue && sNodeName == 'img') {
	    sValue = elmResult.alt;
	}
	if (sValue) { sValue = bNormalize ? sValue.normalize() : sValue.trim(); }
	if (!sValue) {
	    // elmResult.textContent is what we really want here, but it crashes
	    // mysteriously in GM 0.5.3 under FF 1.0.6
	    sValue = elmResult.innerHTML.replace(/<\S[^>]*>/g, '').replace(/\r\n/g, '\n').replace(/\r/g, '\n');
	}
	if (sValue) { sValue = bNormalize ? sValue.normalize() : sValue.trim(); }
	if (!sValue) { continue; }
	if (iPropertyType == PropertyType.DATE) {
	    var iParsedDate = Date.parse(sValue);
	    if (!iParsedDate) { continue; }
	    var sValue = new Date(0);
	    sValue.setTime(iParsedDate);
	}
	if (bAllowMultiple) {
	    arValues.push(bAutoEscape ? vcardEscape(sValue) : sValue);
	}
	else {
	    return bAutoEscape ? vcardEscape(sValue) : sValue;
	}
    }
    return arValues;
}

function findVCards(elmRoot) {
    var sVCards = '';
    if (!elmRoot) {
	var arCards = getPropertyValue(document, 'vcard', PropertyType.NODE, true, false);
    } else {
	var arCards = [elmRoot];
    }
    for (var i = 0; i < arCards.length; i++) {
	var arLines = [];
	var elmCard = arCards[i].cloneNode(true);
	
	function processSingleString(sProperty) {
	    var sValue = getPropertyValue(elmCard, sProperty, PropertyType.STRING, false, true);
	    if (sValue) {
		arLines.push(vcardFold(sProperty.toUpperCase() + ':' + sValue));
	    }
	    return sValue || '';
	}
	
	function processSingleURI(sProperty) {
	    var sValue = getPropertyValue(elmCard, sProperty, PropertyType.URI, false, false);
	    if (sValue) {
		var sContentType = '';
		var sEncoding = '';
		var sValueKey = '';
		if (sValue.startswith('data:')) {
		    sEncoding = ';ENCODING=b';
		    sContentType = sValue.substringBefore(';').substringAfterLast('/');
		    sValue = sValue.substringAfterFirst(',');
		} else {
		    var elmValue = getPropertyValue(elmCard, sProperty, PropertyType.NODE, false, false);
		    if (elmValue) {
			if (sProperty != 'url') {
			    sValueKey = ';VALUE=uri';
			}
			sContentType = (elmValue.type || '').trim().substringAfterLast('/').trim();
		    }
		}
		sContentType = sContentType.toUpperCase();
		if (sContentType == 'OCTET-STREAM') {
		    sContentType = '';
		}
		if (sContentType) {
		    sContentType = ';TYPE=' + sContentType.toUpperCase();
		}
		arLines.push(vcardFold(sProperty.toUpperCase() + sEncoding + sContentType + sValueKey + ':' + sValue));
	    }
	}

	function processTypeValue(sProperty, arDefaultType, arForceType) {
	    var arResults = getPropertyValue(elmCard, sProperty, PropertyType.NODE, true, false);
	    for (var iResultsIndex = 0; iResultsIndex < arResults.length; iResultsIndex++) {
		var elmResult = arResults[iResultsIndex];
		var arType = getPropertyValue(elmResult, 'type', PropertyType.STRING, true, true);
		if (typeof arForceType == 'object') {
		    arType = arForceType.concat(arType).unique();
		}
		if (arType.length == 0) {
		    arType = arDefaultType;
		}
		var sValue = getPropertyValue(elmResult, 'value', PropertyType.EMAIL, false, true);
		if (sValue) {
		    arLines.push(vcardFold(sProperty.toUpperCase() + ';TYPE=' + arType.join(',') + ':' + sValue));
		}
	    }
	}
	
	// AGENT
	// must do this before all other properties because it is destructive
	// (removes nested class="vcard" nodes so they don't interfere with
	// this vcard's other properties)
	var arAgent = getPropertyValue(elmCard, 'agent', PropertyType.NODE, true, false);
	for (var iAgentIndex = 0; iAgentIndex < arAgent.length; iAgentIndex++) {
	    var elmAgent = arAgent[iAgentIndex];
	    if ((' ' + elmAgent.className.normalize() + ' ').toLowerCase().indexOf(' vcard ') != -1) {
		var sAgentValue = (findVCards(elmAgent) + '\n').replace(/\n/g, '\\n').replace(/;/g, '\\;');
		if (sAgentValue) {
		    arLines.push(vcardFold('AGENT:' + sAgentValue));
		}
		elmAgent.parentNode.removeChild(elmAgent);
	    } else {
		var sAgentValue = getPropertyValue(elmAgent, 'value', PropertyType.URI, false, true);
		if (sAgentValue) {
		    arLines.push(vcardFold('AGENT;VALUE=uri:' + sAgentValue));
		}
	    }
	}

	// FN (full name)
	var sFN = processSingleString('fn');
	
	// N (name)
	var elmName = getPropertyValue(elmCard, 'n', PropertyType.NODE, false, false);
	if (elmName.nodeName) {
	    var sFamilyName = getPropertyValue(elmName, 'family-name', PropertyType.STRING, false, true);
	    var sGivenName = getPropertyValue(elmName, 'given-name', PropertyType.STRING, false, true);
	    var arAdditionalNames = getPropertyValue(elmName, 'additional-name', PropertyType.STRING, true, true).concat(getPropertyValue(elmName, 'additional-names', PropertyType.STRING, true, true));
	    var arHonorificPrefixes = getPropertyValue(elmName, 'honorific-prefix', PropertyType.STRING, true, true).concat(getPropertyValue(elmName, 'honorific-prefixes', PropertyType.STRING, true, true));
	    var arHonorificSuffixes = getPropertyValue(elmName, 'honorific-suffix', PropertyType.STRING, true, true).concat(getPropertyValue(elmName, 'honorific-suffixes', PropertyType.STRING, true, true));
	    arLines.push(vcardFold('N:' + sFamilyName + ';' + 
				   sGivenName + ';' +
				   arAdditionalNames.join(',') + ';' +
				   arHonorificPrefixes.join(',') + ';' +
				   arHonorificSuffixes.join(',')));
	} else if (sFN.length > 0) {
	    // implied "N" optimization
	    // http://microformats.org/wiki/hcard#Implied_.22N.22_Optimization
	    var arNames = sFN.normalize().split(' ');
	    if (arNames.length == 2) {
		var bFamilyNameFirst = (arNames[0].endswith(',') ||
				      arNames[1].length == 1 || 
				      (arNames[1].length == 2 && arNames[1].endswith('.')));
		if (bFamilyNameFirst) {
		    arLines.push(vcardFold('N:' + arNames[0] + ';' + arNames[1]));
		} else {
		    arLines.push(vcardFold('N:' + arNames[1] + ';' + arNames[0]));
		}
	    }
	}

	// SORT-STRING
	var sSortString = getPropertyValue(elmCard, 'sort-string', PropertyType.STRING, false, true);
	if (sSortString) {
	    arLines.push(vcardFold('SORT-STRING:' + sSortString));
	}
	
	// NICKNAME
	var arNickname = getPropertyValue(elmCard, 'nickname', PropertyType.STRING, true, true);
	if (arNickname.length != 0) {
	    arLines.push(vcardFold('NICKNAME:' + arNickname.join(',')));
	}
	
	// PHOTO
	processSingleURI('photo');
	
	// BDAY
	var dtBday = getPropertyValue(elmCard, 'bday', PropertyType.DATE, false, false);
	if (dtBday) {
	    arLines.push(vcardFold('BDAY:' + dtBday.toISO8601()));
	}
	
	
	// ADR (address)
	var arAdr = getPropertyValue(elmCard, 'adr', PropertyType.NODE, true, false);
	for (var iAdrIndex = 0; iAdrIndex < arAdr.length; iAdrIndex++) {
	    var elmAdr = arAdr[iAdrIndex];
	    var arType = getPropertyValue(elmAdr, 'type', PropertyType.STRING, true, true);
	    if (arType.length == 0) {
		arType = ['intl','postal','parcel','work']; // default adr types, see RFC 2426 section 3.2.1
	    }
	    var sPostOfficeBox = getPropertyValue(elmAdr, 'post-office-box', PropertyType.STRING, false, true);
	    var sExtendedAddress = getPropertyValue(elmAdr, 'extended-address', PropertyType.STRING, false, true);
	    var sStreetAddress = getPropertyValue(elmAdr, 'street-address', PropertyType.STRING, false, true);
	    var sLocality = getPropertyValue(elmAdr, 'locality', PropertyType.STRING, false, true);
	    var sRegion = getPropertyValue(elmAdr, 'region', PropertyType.STRING, false, true);
	    var sPostalCode = getPropertyValue(elmAdr, 'postal-code', PropertyType.STRING, false, true);
	    var sCountryName = getPropertyValue(elmAdr, 'country-name', PropertyType.STRING, false, true);
	    arLines.push(vcardFold('ADR;TYPE=' + arType.join(',') + ':' +
				   sPostOfficeBox + ';' +
				   sExtendedAddress + ';' +
				   sStreetAddress + ';' +
				   sLocality + ';' +
				   sRegion + ';' +
				   sPostalCode + ';' +
				   sCountryName));
	}
	
	// LABEL
	processTypeValue('label', ['intl','postal','parcel','work']);
	
	// TEL (phone number)
	processTypeValue('tel', ['voice']);
	
	// EMAIL
	processTypeValue('email', ['internet'], ['internet']);
	
	// MAILER
	processSingleString('mailer');
	
	// TZ (timezone)
	processSingleString('tz');

	// GEO (geographical information)
	var elmGeo = getPropertyValue(elmCard, 'geo', PropertyType.NODE, false, false);
	if (elmGeo.nodeName) {
	    var sLatitude = getPropertyValue(elmGeo, 'latitude', PropertyType.STRING, false, true);
	    var sLongitude = getPropertyValue(elmGeo, 'longitude', PropertyType.STRING, false, true);
	    arLines.push(vcardFold('GEO:' + sLatitude + ';' + sLongitude));
	}

	// TITLE
	processSingleString('title');

	// ROLE
	processSingleString('role');

	// LOGO
	processSingleURI('logo');

	// ORG (organization)
	var elmOrg = getPropertyValue(elmCard, 'org', PropertyType.NODE, false, false);
	if (elmOrg.nodeName) {
	    var sOrganizationName = getPropertyValue(elmOrg, 'organization-name', PropertyType.STRING, false, true);
	    if (!sOrganizationName) {
		// implied "organization-name" optimization
		// http://microformats.org/wiki/hcard#Implied_.22organization-name.22_Optimization
		var sOrganizationName = getPropertyValue(elmCard, 'org', PropertyType.STRING, false, true);
		if (sOrganizationName) {
		    arLines.push(vcardFold('ORG:' + sOrganizationName));
		}
	    } else {
		var arOrganizationUnit = getPropertyValue(elmOrg, 'organization-unit', PropertyType.STRING, true, true);
		arLines.push(vcardFold('ORG:' + sOrganizationName + ';' + arOrganizationUnit.join(';')));
	    }
	}

	// CATEGORY
	var arCategory = getPropertyValue(elmCard, 'category', PropertyType.STRING, true, true).concat(getPropertyValue(elmCard, 'categories', PropertyType.STRING, true, true));
	if (arCategory.length != 0) {
	    arLines.push(vcardFold('CATEGORIES:' + arCategory.join(',')));
	}

	// NOTE
	processSingleString('note');

	// REV
	processSingleString('rev');

	// SOUND
	processSingleURI('sound');

	// UID
	processSingleString('uid');

	// URL
	processSingleURI('url');

	// CLASS
	processSingleString('class');

	// KEY
	processSingleURI('key');

	if (arLines.length != 0) {
	    arLines = ['BEGIN:vCard','VERSION:3.0'].concat(arLines).concat(['END:vCard']);
	    sVCards += arLines.join('\n') + '\n';
	}
    }

    return sVCards.trim();
}

// parse vCards
var sVCards = findVCards();
if (sVCards) {
    GM_log(sVCards);
}

// ---------------------------------------------------------------------
// Everything below this line is part of the unit testing harness
// ---------------------------------------------------------------------
function nextTest() {
    var elmNextTestLink = document.evaluate("//" + sPrefix + "*[@rel='next test']", document, NSResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
    if (!elmNextTestLink) { return; }
    var sNextTestURL = elmNextTestLink.href;
    if (!sNextTestURL) { return; }
    window.setTimeout(function() { window.location.href = sNextTestURL; }, 10);
}

var sPrefix = document.documentElement.namespaceURI ? 'xhtml:' : '';
var elmVCFLink = document.evaluate("//" + sPrefix + "link[@rel='alternate'][@type='text/v-card']", document, NSResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
if (elmVCFLink && elmVCFLink.href) {
    GM_xmlhttpRequest({method: 'GET', url: elmVCFLink.href, onload: function(oResponseDetails) {
	if (oResponseDetails.responseText.trim() == sVCards.trim()) {
	    nextTest();
	} else {
	    alert('-----found:\n\n' + sVCards.trim() + '\n\n-----but expected:\n\n' + oResponseDetails.responseText.trim());
	}
    }});
} else {
    nextTest();
}
