/*
** Copyright (C) 2003-2006 Teus Benschop.
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/


#include "libraries.h"
#include "utilities.h"
#include "referenceutils.h"
#include "directories.h"
#include "constants.h"
#include "gwrappers.h"
#include "generalconfig.h"
#include "books.h"
#include <fnmatch.h>
#include "projectutils.h"


ustring references_hidden_ones_get_filename (const ustring& project)
{
  return gw_build_filename (directories_get_projects (), project, "hidden-references");
}


vector<ustring> references_hidden_ones_load ()
// Loads the references that are hidden in this project.
{
  GeneralConfiguration genconfig (0);
  ReadText rt (references_hidden_ones_get_filename (genconfig.project()), true, false);
  return rt.lines;
}


void references_hidden_ones_save (vector <ustring>& references)
// Saves the references that are hidden in this project.
{
  GeneralConfiguration genconfig (0);
  write_lines (references_hidden_ones_get_filename (genconfig.project()), references);
}


void references_hidden_ones_filter (vector <Reference>& references, vector <ustring>& comments)
// Takes "references" as input, and removes from them the ons that are supposed
// to be hidden.
{
  // Load the hidden ones in a set for quicker search.
  vector<ustring> hidden_ones;
  hidden_ones = references_hidden_ones_load ();
  set<ustring> hidden_set (hidden_ones.begin(), hidden_ones.end());
  // Put the references in a temporary container, and copy them back to the 
  // original container, one by one, if they are not hidden.
  vector<Reference> temp_references (references.begin(), references.end());
  vector<ustring> temp_comments (comments.begin(), comments.end());
  references.clear();
  comments.clear();
  for (unsigned int i = 0; i < temp_references.size(); i++) {
    if (hidden_set.find (temp_references[i].human_readable ("") + " " + temp_comments[i]) == hidden_set.end()) {
      references.push_back (temp_references[i]);
      comments.push_back (temp_comments[i]);
    }
  }
}


bool text_contains_reference (const ustring& text)
/*
Finds out whether the text looks like a reference.
A reference, e.g. Mt.5.5 or Mt.5:5 or John 10:5 follows a certain pattern,
while going through it. Some references are given without the bookname, e.g.
"10.5". Handle these too.
Patterns:
- digit, dot/colon, digit.
*/ 
{
  ustring pattern = "*[0-9][:,.][0-9]*";
  return (fnmatch (pattern.c_str(), text.c_str(), 0) == 0);
}


vector <Reference> extract_abbreviated_references (const ustring& language, int book, const ustring& text)
/*
This routine extracts references from the text. 
It expects an abbreviated bookname at the start, 
and then chapter and text information following.

language: language of the abbreviations.
book: currently opened book
text: text to process.

Returns: vector of Reference objects.
*/
{
  // Location to store the references.
  vector <Reference> references;
  
  // If the length of the text is too short for a reference, bail out.
  if (text.length() < 3) return references;

  // Align the container with abbreviations to the container with book ids.
  vector <unsigned int> books = books_type_to_ids (btUnknown);
  vector <ustring> abbreviations;
  for (unsigned int i = 0; i < books.size(); i++) {
    abbreviations.push_back (books_id_to_abbreviation (language, books[i]));
  }

  // Collect all locations where a book is found to start.
  vector <size_t> startlocations;
  vector <size_t> booklengths;
  for (unsigned int i = 0; i < books.size(); i++) {
    ustring abbreviation = books_id_to_abbreviation (language, books[i]);
    size_t position = text.find (abbreviation);
    while (position != string::npos) {
      startlocations.push_back (position);
      booklengths.push_back (abbreviation.length());
      position = text.find (abbreviation, ++position);
    }
  }
  
  // At this point, it has been seen that a text like 3 John 3 was found
  // once at "3 John" and once again at "John", the latter of which is wrong.
  // We therefore need to go through our results, and remove ones that overlap,
  // and take the longest of them.
  // This is how to check on that:
  // - Take each of the start locations.
  // - Look through all the other ones whether there is one that 
  //   starts before this location, and ends at the same place.
  // - If there is such a one, do not store this particular starting locations.
  {
    vector <size_t> startlocations2;
    for (unsigned int i = 0; i < startlocations.size(); i++) {
      bool store_this_one = true;
      unsigned int startlocation = startlocations[i];
      unsigned int endlocation = startlocation + booklengths[i];
      for (unsigned int i2 = 0; i2 < startlocations.size(); i2++) {
        if ((startlocations[i2] < startlocation) 
         && ((startlocations[i2] + booklengths[i2]) == endlocation)) {
          store_this_one = false;
          break;
        }
      }
      if (store_this_one) 
        startlocations2.push_back (startlocation);
    }
    startlocations = startlocations2;
  }

  // Sort the starting locations.
  sort (startlocations.begin(), startlocations.end());
  
  // The above system fails to catch a chapter.verse reference without a book,
  // which is the first in a row. Look for this type.
  {
    size_t beginpos = 7;
    size_t endpos = text.length();
    if (!startlocations.empty()) endpos = startlocations[0];
    if (beginpos < endpos) {
      size_t numeral = text.find_first_of ("0123456789", beginpos);
      if (numeral != string::npos) {
        if (numeral < endpos) {
          startlocations.push_back (numeral);
          sort (startlocations.begin(), startlocations.end());
        }
      }      
    }    
  }

  // Collect all bits of text, aligned to the start locations.
  vector <ustring> textbits;
  for (unsigned int i = 0; i < startlocations.size(); i++) {
    size_t max = text.length();
    if ((i + 1) < startlocations.size())
      max = startlocations[i + 1];
    max -= startlocations[i];
    ustring bit = text.substr (startlocations[i], max);
    max = bit.find ("\\");
    if (max != string::npos)
      bit = bit.substr (0, max);
    bit = trim (bit);
    textbits.push_back (bit);
  }
  
  // Collect the book ids for each of bits of text, aligned to them.
  vector <unsigned int> bookids;
  for (unsigned int i = 0; i < textbits.size(); i++) {
    unsigned int id = 0;
    for (unsigned int i2 = 0; i2 < abbreviations.size(); i2++) {
      if (textbits[i].find (abbreviations[i2]) == 0) {
        id = books[i2];
        textbits[i].erase (0, abbreviations[i2].length());
        textbits[i] = trim (textbits[i]);
      }      
    }
    bookids.push_back (id);
  }
  
  // We take it that crossreferences follow this system.
  // Gen. 10.1. One reference
  // Gen. 10.1; 11.2. Two references in Genesis.
  // Gen. 10.1. 11.2. Two references, one in Genesis, and the other in the 
  //                  current book.
  // Go through the bits of text to look for the last type of reference.
  {
    vector <unsigned int> bookids2;
    vector <ustring> textbits2;
    for (unsigned int i = 0; i < textbits.size(); i++) {
      bool first_one_stored = false;
      size_t pos = textbits[i].find (". ");
      while (pos != string::npos) {
        ustring bit2 = textbits[i].substr (0, ++pos);
        textbits2.push_back (trim (bit2));
        if (first_one_stored) bookids2.push_back (0);
        else bookids2.push_back (bookids[i]);
        textbits[i].erase (0, pos);        
        textbits[i] = trim (textbits[i]);
        first_one_stored = true;
        pos = textbits[i].find (". ", pos);
      }
      textbits[i] = trim (textbits[i]);
      textbits2.push_back (textbits[i]);
      if (first_one_stored) bookids2.push_back (0);
      else bookids2.push_back (bookids[i]);
    }
    textbits = textbits2;
    bookids = bookids2;
  }

  // Consider these examples.
  // Gen. 10.1; 11.2. Two references in Genesis.
  // 10.10; 11.11. Two references in the current book.
  // Go through the bits and pieces and separate these two references.
  {
    vector <unsigned int> bookids2;
    vector <ustring> textbits2;
    for (unsigned int i = 0; i < textbits.size(); i++) {
      size_t pos = textbits[i].find ("; ");
      while (pos != string::npos) {
        ustring bit2 = textbits[i].substr (0, ++pos);
        textbits2.push_back (trim (bit2));
        bookids2.push_back (bookids[i]);
        textbits[i].erase (0, pos);        
        textbits[i] = trim (textbits[i]);
        pos = textbits[i].find ("; ");
      }
      textbits[i] = trim (textbits[i]);
      textbits2.push_back (textbits[i]);
      bookids2.push_back (bookids[i]);
    }
    textbits = textbits2;
    bookids = bookids2;
  }

  // Consider this text: 
  // Exod. 10.2; 11.3. Chapter 2 and 12.4. See again 1 Chron. 11.12; 12.12. 13.13.
  // This divides into:
  // book - text bit
  //    0 - Chapter 2 and 12.4.
  //    0 - See again
  //   13 - 11.12;
  // Clean these up, that means the second example contains no reference, and 
  // should be thrown out, and the first example should have its book id set
  // to the currently opened book.
  for (unsigned int i = 0; i < bookids.size(); i++) {
    if ((bookids[i] == 0) && (textbits[i].length() >= 3)) {
      // Look for pattern numeral-numeral-numeral-dot-numeral, or 
      // numeral-numeral-dot-numeral or -numeral-dot-numeral, that is e.g.
      // 150.1 or 10.1 or 1.1.
      for (unsigned int i2 = 0; i2 < textbits[i].length() - 3; i2++) {
        ustring bit = textbits[i].substr (i2, 10000);
        bool match = false;
        if (fnmatch ("[0-9][0-9][0-9][:.][0-9]*", bit.c_str(), 0) == 0) match = true;    
        if (fnmatch ("[0-9][0-9][:.][0-9]*", bit.c_str(), 0) == 0) match = true;    
        if (fnmatch ("[0-9][:.][0-9]*", bit.c_str(), 0) == 0) match = true;
        if (match) {
          textbits[i] = bit;
          bookids[i] = book;
          break;
        }
      }
    }
  }

  /* 
  We're now at a stage of the reference discovery that there is a pattern
  of numericals.
  Sample patterns:
  36.10 = chapter.verse
  24.1-4 = chapter.verse-verse.
  24.14,15 - chapter.verse,verse.
  24.14,15,18 - chapter.verse,verse,verse.
  other combinations are possible, using several commas to get a list of verses,
  and hyphens to get a range.
  */
  for (unsigned int i = 0; i < bookids.size(); i++) {
    
    // Skip book id zero.
    if (bookids[i] == 0) continue;

    // If the dot is there, and not at the end, then there is a chapter number. 
    // If the dot is not there, we take it that the chapter is one.
    // Extract that chapter number.
    // Skip this if the number is not there.
    unsigned int chapternumber = 1;
    bool dotfound = textbits[i].find (".") < textbits[i].length() - 1;
    if (dotfound) {
      ustring ufirst = number_in_string (textbits[i]);
      if (ufirst.empty()) continue;
      size_t length = MIN (textbits[i].length(), ufirst.length() + 1);
      textbits[i].erase (0, length);
      textbits[i] = trim (textbits[i]);
      chapternumber = convert_to_int (ufirst);
    }
    // Extract the rest of the number(s), which will be the verses.
    // If we have a case like "Jude 10", then the "10" should be taken as the
    // verse number. In such cases take chapter 1 and verse 10.
    // In case of e.g. Genesis 1, take all verses of Genesis 1.
    ustring verse = number_in_string (textbits[i]);
    ustring conjunction;
    unsigned int infiniteloop = 0;
    ustring previousverse;
    while (!verse.empty()) {
      if (conjunction == "-") {
        unsigned int start = convert_to_int (previousverse);
        start++;
        unsigned int end = convert_to_int (verse);
        for (unsigned int i2 = start; i2 <= end; i2++) {
          Reference reference (bookids[i], chapternumber, convert_to_string (i2));
          references.push_back (reference);
        }
      } else {
        if (dotfound) {
          Reference reference (bookids[i], chapternumber, verse);
          references.push_back (reference);
        } else {
          if (books_id_to_one_chapter (bookids[i])) {
            Reference reference (bookids[i], 1, verse);
            references.push_back (reference);
          } else {
            GeneralConfiguration genconfig (0);
            vector <ustring> verses = project_get_verses (genconfig.project(), bookids[i], convert_to_int (verse));
            for (unsigned int i2 = 0; i2 < verses.size(); i2++) {
              Reference reference (bookids[i], convert_to_int (verse), verses[i2]);
              references.push_back (reference);
            }
          }
        }
      }
      textbits[i].erase (0, verse.length());
      textbits[i] = trim (textbits[i]);
      conjunction.clear();
      if (!textbits[i].empty()) {
        conjunction = textbits[i].substr (0, 1);
        textbits[i].erase (0, 1);
        textbits[i] = trim (textbits[i]);
      }
      previousverse = verse;
      verse = number_in_string (textbits[i]);
      infiniteloop++;
      if (infiniteloop > 100) break;
    }



    

  }
  
  // Return the references found.  
  return references;
}
