#!/usr/bin/env texlua
kpse.set_program_name("luatex")
-- ctanbib -- export ctan entries to bib format
-- Copyright: Michal Hoftich <michal.h21@gmail.com> (2014-2018)
--
-- This work may be distributed and/or modified under the
-- conditions of the LaTeX Project Public License, either version 1.3
-- of this license or (at your option) any later version.
-- The latest version of this license is in
--   http://www.latex-project.org/lppl.txt
-- and version 1.3 or later is part of all distributions of LaTeX
-- version 2005/12/01 or later.
-- 
-- This work has the LPPL maintenance status `maintained'.
-- 
-- The Current Maintainer of this work is Michal Hoftich

local bibtype = "manual"
local pkgurl = false
if #arg < 1 or arg[1]=="--help" or arg[1]=="-h" then
  print([[ctanbib - convert ctan package information to bibtex format
Usage:
ctanbib [options] <package name>
Available options:
  -c,--ctan       Use @ctan type instead of @manual and long url
  -C,--CTAN       Use @ctan type and short ... /pkg/url
  -h,--help       Print this message
  -v,--version    Print version info
  ]])
  os.exit(0)
elseif arg[1]=="--version" or arg[1]=="-v" then
  print "ctanbib version 0.1e"
  os.exit(0)
elseif arg[1]=="--ctan" or arg[1]=="-c" then
  table.remove(arg, 1)
  bibtype = "ctan"
elseif arg[1]=="--CTAN" or arg[1]=="-C" then
  table.remove(arg, 1)
  bibtype = "ctan"
  pkgurl = true
end

local pkgname = arg[1]

if not pkgname then 
  print "[ctanbib] Error: missing package name"
  os.exit(1)
end

local url = "https://ctan.org/xml/2.0/pkg/" .. pkgname .. "?author-name=true"
local authors_url = "https://ctan.org/xml/2.0/authors"

-- change that for different title scheme
local titleformat = "The %s package"

local bibtexformat = [[
@manual{$package,
title = {$title},
subtitle = {$subtitle},
author = {$author},
url = {$url},
urldate = {$urldate}, 
date = {$date},
version = {$version}
}
]]

local dom = require('luaxml-domobject')

local load_xml =  function(url)
  -- local command = io.popen("wget -qO- ".. url,"r")
  local command = io.popen("curl -sS ".. url,"r")

  local info = command:read("*all")
  command:close()

  if string.len(info) == 0 then
    return false
  end
  return dom.parse(info)
end

local bibtex_escape = function(a)
  local a = a or ""
  return a:gsub("([%$%{%}%\\])", function(x) 
    if x == "\\" then return "\\textbackslash  " end
    return '\\'..x 
  end)
end

-- we need to use this method temporarily because of a bug in
-- CTAN API
local author_list
local fetch_author_list = function()
  if author_list then return author_list end
  local authors = load_xml(authors_url)
  author_list = {}
  for _, author in ipairs(authors:query_selector("author")) do
    -- save all authors under they ID 
    local id = author:get_attribute("id")
    author_list[id] = author
  end
  return author_list
end

local process_author = function(author)
  local current = {}
  current[#current+1] = author:get_attribute("familyname")
  current[#current+1] = author:get_attribute("givenname")
  -- the author is an organization
  if #current == 1 then
    return "{" .. current[1] .. "}"
  elseif #current > 1 then
    return table.concat(current, ", ")
  end
  return nil -- no author
end

-- get the author name and surname
local get_author = function(author)
  local name = process_author(author)
  if name then return name end
  -- if the package XML doesn't contain author name,
  -- we need to fetch the authors list from CTAN and find
  -- it here
  local author_list =  fetch_author_list()
  local id = author:get_attribute("id")
  -- try to find the author id in list of authors
  -- if everything fails, just return an empty group
  return process_author(author_list[id]) or "{}"
end

local get_authors = function(a)
  local retrieved_authors = {}
  for _, author in ipairs(a) do
    table.insert(retrieved_authors, get_author(author))
  end
  return table.concat(retrieved_authors," and ")
end

local get_title = function(record)
  local title = record:query_selector("name")[1]
  if title then
    title = title:get_text()
    title = title:gsub("^(.)", function(a) return unicode.utf8.upper(a) end)
  else
    title = pkgname
  end
  return string.format(titleformat, bibtex_escape(title))
end


local get_url = function(record)
  local home = record:query_selector("home")[1]
  if home then return home:get_attribute("href") end
  return "https://ctan.org/pkg/"..pkgname
end

local get_caption = function(record)
  local caption = record:query_selector("caption")[1]
  if caption then return bibtex_escape(caption:get_text()) end
  return nil
end

local get_version = function(record)
  local version = record:query_selector("version")[1]
  if version then
    return version:get_attribute("number"), version:get_attribute("date")
  end
end

local ctan_url = function(record)
  local ctan = record:query_selector("ctan")[1] 
  -- some package don't contain the CTAN path
  if not ctan then return get_url(record) end
  local path = ctan:get_attribute("path")
  return path
end


local compile = function(template, records)
  return template:gsub("$([a-z]+)", function(a) 
    return records[a] or ""
  end)
end

local record = load_xml(url)

if not record then
  print("Cannot find entry for package "..pkgname)
  os.exit(1)
end

-- root element is also saved, so we use this trick 
-- local record = entry.entry

local e = {}

e.author = get_authors(record:query_selector("authorref"))
e.package = pkgname
e.title = get_title(record)
e.subtitle = get_caption(record)
e.url = get_url(record)
-- use the CTAN path as url for the CTAN type
if (bibtype == "ctan") and not pkgurl then
  e.url = ctan_url(record)
end
e.version, e.date = get_version(record)
e.urldate = os.date("%Y-%m-%d")

local result = compile(bibtexformat, e)
-- update the bibliography type
result = result:gsub("^@manual", "@" .. bibtype)

print(result)
