/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.temporal.ae.feature;

import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.extractor.CleartkExtractorException;
import org.cleartk.ml.feature.extractor.FeatureExtractor1;

import java.util.*;

public class ClosestVerbExtractor implements FeatureExtractor1 {

  private String name;

//  private Logger LOGGER = LoggerFactory.getLogger(this.getClass().getName());

  public ClosestVerbExtractor() {
    super();
    this.name = "ClosestVerb";
    
  }

  @Override
  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException {
	  List<Feature> features = new ArrayList<>();
	  
	  //1 get covering sentence:
	  Map<EventMention, List<Sentence>> coveringMap =
			  JCasUtil.indexCovering(view, EventMention.class, Sentence.class);
	  EventMention targetTokenAnnotation = (EventMention)annotation;
	  Collection<Sentence> sentList = coveringMap.get(targetTokenAnnotation);
	  
	  Map<Integer, WordToken> verbDistMap = null;
	  
	  //2 get all Verbs within the same sentence as target event lies
	  if (sentList != null && !sentList.isEmpty()){
		  for(Sentence sent : sentList) {
			  verbDistMap = new TreeMap<>();
			  for ( WordToken wt : JCasUtil.selectCovered(view, WordToken.class, sent)) {
				  if (wt != null){
					  String pos = wt.getPartOfSpeech();
					  if (pos.startsWith("VB")){
						  verbDistMap.put(Math.abs(wt.getBegin() - annotation.getBegin()), wt);
					  }
				  }
			  }
			  for (Map.Entry<Integer, WordToken> entry : verbDistMap.entrySet()) {
				  Feature feature = new Feature(this.name+"_token", entry.getValue().getCoveredText());
				  features.add(feature);
				  //LOGGER.info("found nearby closest verb: "+ entry.getValue().getCoveredText() + " POS:" + entry.getValue().getPartOfSpeech());
				  Feature posfeature = new Feature(this.name, entry.getValue().getPartOfSpeech());
				  features.add(posfeature);
				  break;		          
			  }
		  }
		  
	  }
	  return features;
  }

	public List<Feature> extract( final JCas view, final Annotation annotation, final Collection<WordToken> words )
			throws CleartkExtractorException {
		final List<Feature> features = new ArrayList<>();
		final int annotationBegin = annotation.getBegin();
		int closestDistance = Integer.MAX_VALUE;
		WordToken closestToken = null;
		for ( WordToken wt : words ) {
			String pos = wt.getPartOfSpeech();
			if ( pos.startsWith( "VB" ) ) {
				final int distance = Math.abs( wt.getBegin() - annotationBegin );
				if ( distance < closestDistance ) {
					closestDistance = distance;
					closestToken = wt;
				}
			}
		}
		if ( closestToken == null ) {
			return Collections.emptyList();
		}
		final Feature feature = new Feature( this.name+"_token", closestToken.getCoveredText() );
		features.add( feature );
		//LOGGER.info("found nearby closest verb: "+ entry.getValue().getCoveredText() + " POS:" + entry.getValue().getPartOfSpeech());
		final Feature posfeature = new Feature( this.name, closestToken.getPartOfSpeech() );
		features.add( posfeature );
		return features;
	}


}
