diff --git a/.gitignore b/.gitignore
index 1cdc9f7..ea76a74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,3 @@
target/
-pom.xml.tag
-pom.xml.releaseBackup
-pom.xml.versionsBackup
-pom.xml.next
-release.properties
-dependency-reduced-pom.xml
-buildNumber.properties
-.mvn/timing.properties
+.idea/
+
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..845f5de
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,29 @@
+
+
+ 4.0.0
+
+ com.chaoticity.citationsentiment
+ citationsentimentclassifier
+ 1.0-SNAPSHOT
+
+
+
+ maven
+ https://repo1.maven.org/maven2
+
+
+
+
+ nz.ac.waikato.cms.weka
+ weka-stable
+ 3.6.6
+
+
+ nz.ac.waikato.cms.weka
+ LibSVM
+ 1.0.3
+
+
+
\ No newline at end of file
diff --git a/src/main/java/com/chaoticity/citationsentiment/CitationSentimentClassifier.java b/src/main/java/com/chaoticity/citationsentiment/CitationSentimentClassifier.java
new file mode 100644
index 0000000..f32a544
--- /dev/null
+++ b/src/main/java/com/chaoticity/citationsentiment/CitationSentimentClassifier.java
@@ -0,0 +1,75 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package com.chaoticity.citationsentiment;
+
+import weka.classifiers.Evaluation;
+import weka.classifiers.functions.LibSVM;
+import weka.core.Instances;
+import weka.core.converters.ConverterUtils;
+import weka.core.tokenizers.NGramTokenizer;
+import weka.core.tokenizers.WordTokenizer;
+import weka.filters.Filter;
+import weka.filters.unsupervised.attribute.StringToWordVector;
+
+import java.util.Random;
+
+/**
+ * Code and data for citation sentiment classification reported in http://www.aclweb.org/anthology/P11-3015
+ * The file test.arff contains only the test set with dependency triplets generated with Stanford CoreNLP
+ * Full corpus available at http://www.cl.cam.ac.uk/~aa496/citation-sentiment-corpus
+ *
+ * @author Awais Athar
+ */
+public class CitationSentimentClassifier {
+
+
+ /**
+ * @param args the command line arguments
+ */
+ public static void main(String[] args) throws Exception {
+ ConverterUtils.DataSource source = new ConverterUtils.DataSource("test.arff");
+ Instances data = source.getDataSet();
+
+ // Set class attribute
+ data.setClassIndex(data.attribute("@@class@@").index());
+
+ // delete unused attributes
+ data.deleteAttributeAt(1);
+ data.deleteAttributeAt(2);
+
+ // split dependencies on space
+ StringToWordVector unigramFilter = new StringToWordVector();
+ unigramFilter.setInputFormat(data);
+ unigramFilter.setIDFTransform(true);
+ unigramFilter.setAttributeIndices("3");
+ WordTokenizer whitespaceTokenizer = new WordTokenizer();
+ whitespaceTokenizer.setDelimiters(" ");
+ unigramFilter.setTokenizer(whitespaceTokenizer);
+ data = Filter.useFilter(data,unigramFilter);
+
+ // make trigrams from citation sentences
+ StringToWordVector trigramFilter = new StringToWordVector();
+ trigramFilter.setInputFormat(data);
+ trigramFilter.setIDFTransform(true);
+ trigramFilter.setAttributeIndices("2");
+ NGramTokenizer tokenizer = new NGramTokenizer();
+ tokenizer.setNGramMinSize(1);
+ tokenizer.setNGramMaxSize(3);
+ trigramFilter.setTokenizer(tokenizer);
+ data = Filter.useFilter(data,trigramFilter);
+
+ // Train and test 10x cross-validation
+ int folds = 10;
+ LibSVM svm = new LibSVM();
+ svm.setCost(1000);
+ Evaluation eval = new Evaluation(data);
+ eval.crossValidateModel(svm, data, folds, new Random(1));
+ System.out.println(eval.toMatrixString());
+ System.out.println(eval.toSummaryString());
+ System.out.println(eval.toClassDetailsString());
+ }
+
+
+}
diff --git a/test.arff b/test.arff
new file mode 100644
index 0000000..95400e9
--- /dev/null
+++ b/test.arff
@@ -0,0 +1,7273 @@
+@relation 'C:\\work\\data\\arff\\postwriteup\\aan3withauth.txt'
+
+@attribute @@class@@ {o,n,p}
+@attribute @@id@@ string
+@attribute @@sentence@@ string
+@attribute @@author@@ string
+@attribute @@dependencies@@ string
+
+@data
+o,736,'In it was observed that a significant percent of the queries made by a user in a search engine are associated to a repeated search ',Berger,'prep_observed_In pobj_In_ nsubjpass_observed_it auxpass_observed_was complm_associated_that det_percent_a amod_percent_significant nsubjpass_associated_percent prep_percent_of det_queries_the pobj_of_queries partmod_queries_made prep_made_by det_user_a pobj_by_user prep_user_in det_engine_a nn_engine_search pobj_in_engine auxpass_associated_are ccomp_observed_associated prep_associated_to det_search_a amod_search_repeated pobj_to_search '
+o,737,'Output sequence optimization Rather than basing classifications only on model parameters estimated from co-occurrences between input and output symbols employed for maximizing the likelihood of point-wise single-label predictions at the output level , classifier output may be augmented by an optimization over the output sequence as a whole using optimization techniques such as beam searching in the space of a conditional markov models output or hidden markov models ',Ratnaparkhi,'nn_optimization_Output nn_optimization_sequence dep_than_Rather advcl_augmented_than pcomp_than_basing dobj_basing_classifications dep_on_only prep_basing_on nn_parameters_model pobj_on_parameters partmod_parameters_estimated prep_estimated_from pobj_from_co-occurrences prep_co-occurrences_between nn_symbols_input cc_input_and conj_input_output pobj_between_symbols partmod_symbols_employed prep_employed_for pcomp_for_maximizing det_likelihood_the dobj_maximizing_likelihood prep_likelihood_of amod_predictions_point-wise amod_predictions_single-label pobj_of_predictions prep_maximizing_at det_level_the nn_level_output pobj_at_level nn_output_classifier nsubjpass_augmented_output aux_augmented_may auxpass_augmented_be dep_optimization_augmented prep_augmented_by det_optimization_an pobj_by_optimization prep_optimization_over det_sequence_the nn_sequence_output pobj_over_sequence prep_augmented_as det_whole_a pobj_as_whole partmod_whole_using nn_techniques_optimization dobj_using_techniques dep_as_such prep_techniques_as pobj_as_beam partmod_beam_searching prep_searching_in det_space_the pobj_in_space prep_space_of det__a amod__conditional amod__markov nn__models nn__output pobj_of_ cc__or amod_models_hidden amod_models_markov conj__models advmod_searching_ '
+o,738,'Dredze et al yielded the second highest score1 in the domain adaptation track ',Dredze,'nsubj_yielded_Dredze cc_Dredze_et conj_Dredze_al det_score1_the amod_score1_second amod_score1_highest dobj_yielded_score1 prep_yielded_in det_track_the nn_track_domain nn_track_adaptation pobj_in_track '
+o,739,'The IBM models search a version of permutation space with a one-to-many constraint ',Brown,'det_search_The nn_search_IBM nn_search_models nn_search_ det_version_a dep_search_version prep_version_of nn_space_permutation pobj_of_space prep_space_with det_constraint_a amod_constraint_one-to-many pobj_with_constraint '
+o,740,' propose the use of language models for sentiment analysis task and subjectivity extraction ',Pang,'advmod_propose_ det_use_the dobj_propose_use prep_use_of nn_models_language pobj_of_models prep_models_for nn_task_sentiment nn_task_analysis pobj_for_task cc_task_and amod_extraction_subjectivity conj_task_extraction '
+o,741,'In training process , we use GIZA + + 4 toolkit for word alignment in both translation directions , and apply grow-diag-final method to refine it ',Koehn,'prep_use_In nn_process_training pobj_In_process nsubj_use_we advmod_+_GIZA acomp_use_+ cc_+_+ dep_toolkit_4 conj_+_toolkit prep_+_for nn_alignment_word pobj_for_alignment prep_alignment_in det_directions_both nn_directions_translation pobj_in_directions cc_use_and conj_use_apply amod_method_grow-diag-final dobj_use_method aux_refine_to infmod_method_refine dobj_refine_it '
+o,742,'The models in the comparative study by did not include such features , and so , again for consistency of comparison , we experimentally verified that our maximum entropy model -LRB- a -RRB- consistently yielded higher scores than when the features were not used , and -LRB- b -RRB- consistently yielded higher scores than nave Bayes using the same features , in agreement with ',Klein,'det_models_The nsubj_include_models prep_models_in det_study_the amod_study_comparative pobj_in_study prep_study_by pobj_by_ aux_include_did neg_include_not amod_features_such dobj_include_features cc_include_and advmod_include_so advmod_verified_again prep_verified_for pobj_for_consistency prep_consistency_of pobj_of_comparison nsubj_verified_we advmod_verified_experimentally ccomp_include_verified complm_yielded_that poss_model_our amod_model_maximum amod_model_entropy nsubj_yielded_model dep_-LRB-_a advmod_yielded_consistently ccomp_verified_yielded amod_scores_higher dobj_yielded_scores prep_yielded_than advmod_used_when det_features_the nsubjpass_used_features auxpass_used_were neg_used_not pcomp_than_used cc_include_and dep_-RRB-_b nsubj_yielded_consistently conj_include_yielded amod_scores_higher dobj_yielded_scores prep_yielded_than amod_Bayes_nave pobj_than_Bayes dep_yielded_using det_features_the amod_features_same dobj_using_features prep_yielded_in pobj_in_agreement prep_yielded_with '
+o,743,' and et al ',Rosti,'cc__and conj__ cc__et conj__al '
+o,744,'We used the WordNet : : Similarity package to compute baseline scores for several existing measures , noting that one word pair was not processed in WS-353 because one of the words was missing from WordNet ',Pedersen,'nsubj_used_We det_WordNet_the dobj_used_WordNet nn_package_Similarity nsubj__package parataxis_used_ aux_compute_to xcomp__compute nn_scores_baseline dobj_compute_scores prep_compute_for amod_measures_several amod_measures_existing pobj_for_measures xcomp__noting complm_processed_that num_pair_one nn_pair_word nsubjpass_processed_pair auxpass_processed_was neg_processed_not ccomp_noting_processed prep_processed_in pobj_in_WS-353 mark_missing_because nsubj_missing_one prep_one_of det_words_the pobj_of_words aux_missing_was advcl_processed_missing prep_missing_from pobj_from_WordNet '
+o,745,'We use MER to tune the decoders parameters using a development data set ',Och,'nsubj_use_We nn__MER nsubj_tune_ aux_tune_to xcomp_use_tune det_parameters_the amod_parameters_decoders dobj_tune_parameters partmod_parameters_using det_set_a nn_set_development nn_set_data dobj_using_set '
+o,746,'The training set is extracted from TreeBank section 1518 , the development set , used in tuning parameters of the system , from section 20 , and the test set from section 21 ',Marcus,'det_set_The nn_set_training nsubjpass_extracted_set auxpass_extracted_is prep_extracted_from pobj_from_TreeBank num_TreeBank_ punct_TreeBank_section num_TreeBank_1518 det_development_the nsubj_set_development ccomp_extracted_set ccomp_set_used prep_used_in amod_parameters_tuning pobj_in_parameters prep_parameters_of det_system_the pobj_of_system prep_used_from pobj_from_section num_section_20 cc_extracted_and det_test_the nsubj_set_test conj_extracted_set prep_set_from pobj_from_section num_section_21 '
+o,747,'For non-local features , we adapt cube pruning from forest rescoring , since the situation here is analogous to machine translation decoding with integrated language models : we can view the scores of unit nonlocal features as the language model cost , computed on-the-fly when combining sub-constituents ',Huang,'prep_adapt_For amod_features_non-local pobj_For_features nsubj_adapt_we nn_pruning_cube dobj_adapt_pruning prep_adapt_from nn__forest amod__rescoring pobj_from_ mark_analogous_since det_situation_the nsubj_analogous_situation advmod_analogous_here cop_analogous_is dep__analogous prep_analogous_to nn_translation_machine pobj_to_translation partmod_translation_decoding prep_decoding_with amod_models_integrated nn_models_language pobj_with_models nsubj_view_we aux_view_can parataxis_adapt_view det_scores_the dobj_view_scores prep_scores_of nn_features_unit nn_features_nonlocal pobj_of_features prep_view_as det_cost_the nn_cost_language nn_cost_model pobj_as_cost amod_on-the-fly_computed appos_cost_on-the-fly advmod_combining_when advcl_view_combining dobj_combining_sub-constituents '
+o,748,'31 Agreement for Emotion Classes The kappa coefficient of agreement is a statistic adopted by the Computational Linguistics community as a standard measure for this purpose ',Carletta,'num_Agreement_31 nsubj_Classes_Agreement prep_Agreement_for pobj_for_Emotion det_coefficient_The nn_coefficient_kappa nsubj_statistic_coefficient prep_coefficient_of pobj_of_agreement cop_statistic_is det_statistic_a ccomp_Classes_statistic partmod_statistic_adopted prep_adopted_by det_community_the nn_community_Computational nn_community_Linguistics pobj_by_community prep_adopted_as det_measure_a amod_measure_standard pobj_as_measure prep_measure_for det_purpose_this pobj_for_purpose '
+p,749,'The averaged version of the perceptron , like the voted perceptron , reduces the effect of over-training ',Collins,'det_version_The amod_version_averaged nsubj_reduces_version prep_version_of det__the nn__perceptron pobj_of_ prep_version_like det__the amod__voted nn__perceptron pobj_like_ det_effect_the dobj_reduces_effect prep_effect_of pobj_of_over-training '
+o,750,'ITGs translate into simple -LRB- 2,2 -RRB- - BRCGs in the following way ; see for a definition of ITGs ',Wu,'dep_ITGs_translate prep_translate_into pobj_into_simple appos_simple_2,2 dep_simple_BRCGs prep_BRCGs_in det_way_the amod_way_following pobj_in_way parataxis_translate_see dobj_see_ prep_see_for det_definition_a pobj_for_definition prep_definition_of pobj_of_ITGs '
+o,751,'This may be because their system was not tuned using minimum error rate training ',Och,'nsubj_be_This aux_be_may mark_tuned_because poss_system_their nsubjpass_tuned_system auxpass_tuned_was neg_tuned_not advcl_be_tuned dep_tuned_using amod_training_minimum nn_training_error nn_training_rate dobj_using_training '
+o,752,'However , most of the existing models have been developed for English and trained on the Penn Treebank , which raises the question whether these models generalize to other languages , and to annotation schemes that differ from the Penn Treebank markup ',Marcus,'advmod_developed_However nsubjpass_developed_most prep_most_of det_models_the amod_models_existing pobj_of_models aux_developed_have auxpass_developed_been prep_developed_for pobj_for_English cc_developed_and conj_developed_trained prep_trained_on det__the nn__Penn nn__Treebank pobj_on_ nsubj_raises_which rcmod__raises det_question_the dobj_raises_question complm_generalize_whether det_models_these nsubj_generalize_models ccomp_raises_generalize prep_generalize_to amod_languages_other pobj_to_languages cc_on_and conj_on_to nn_schemes_annotation pobj_to_schemes nsubj_differ_that rcmod_schemes_differ prep_differ_from det_markup_the nn_markup_Penn nn_markup_Treebank pobj_from_markup '
+o,753,'Following , we used sections 0-18 of the Wall Street Journal -LRB- WSJ -RRB- corpus for training , sections 19-21 for development , and sections 22-24 for final evaluation ',Collins,'prep_used_Following pobj_Following_ nsubj_used_we dobj_used_sections num_sections_0-18 prep_sections_of det_corpus_the nn_Journal_Wall nn_Journal_Street nn_corpus_Journal abbrev_Journal_WSJ pobj_of_corpus prep_corpus_for pobj_for_training conj_sections_sections num_sections_19-21 prep_sections_for pobj_for_development cc_sections_and conj_sections_sections num_sections_22-24 prep_sections_for amod_evaluation_final pobj_for_evaluation '
+o,754,'In , the authors provide some sample subtrees resulting from such a 1,000-word clustering ',Brown,'prep_provide_In pobj_In_ det_authors_the nsubj_provide_authors det_sample_some nsubj_subtrees_sample ccomp_provide_subtrees xcomp_subtrees_resulting prep_resulting_from predet_clustering_such det_clustering_a amod_clustering_1,000-word pobj_from_clustering '
+o,755,'We took part the Multilingual Track of all ten languages provided by the CoNLL-2007 shared task organizer ',Marcus,'nsubj_took_We iobj_took_part det_Track_the nn_Track_Multilingual dobj_took_Track prep_Track_of quantmod_ten_all num_languages_ten pobj_of_languages partmod_languages_provided prep_provided_by det_organizer_the amod_organizer_CoNLL-2007 amod_organizer_shared nn_organizer_task pobj_by_organizer '
+o,756,'To set the weights , m , we carried out minimum error rate training using BLEU as the objective function ',Och,'aux_set_To dep_carried_set det_weights_the dobj_set_weights appos_weights_m nsubj_carried_we prt_carried_out amod__minimum nn__error nn__rate nn__training dobj_carried_ partmod__using nn__BLEU dobj_using_ prep__as det_function_the amod_function_objective pobj_as_function '
+o,757,'Use of global features for structured prediction problem has been explored by several NLP applications such as sequential labeling and dependency parsing with a great deal of success ',Finkel,'nsubjpass_explored_Use prep_Use_of amod_features_global pobj_of_features prep_features_for amod_problem_structured nn_problem_prediction pobj_for_problem aux_explored_has auxpass_explored_been prep_explored_by amod_applications_several nn_applications_NLP pobj_by_applications dep_as_such prep_applications_as amod__sequential nn__labeling pobj_as_ cc__and amod__dependency nn__parsing conj__ prep_explored_with det_deal_a amod_deal_great pobj_with_deal prep_deal_of pobj_of_success '
+o,758,'It is important to realize that the output of all mentioned processing steps is noisy and contains plenty of mistakes , since the data has huge variability in terms of quality , style , genres , domains etc , and domain adaptation for the NLP tasks involved is still an open problem ',Dredze,'nsubj_important_It cop_important_is aux_realize_to xcomp_important_realize complm_mentioned_that det_output_the nsubj_mentioned_output prep_output_of pobj_of_all ccomp_realize_mentioned nn_steps_processing nsubj_noisy_steps cop_noisy_is ccomp_mentioned_noisy cc_mentioned_and conj_mentioned_contains dobj_contains_plenty prep_plenty_of pobj_of_mistakes mark_has_since det_data_the nsubj_has_data advcl_important_has amod_variability_huge dobj_has_variability prep_variability_in pobj_in_terms prep_terms_of pobj_of_quality conj_quality_style conj_quality_genres nn_etc_domains conj_quality_etc cc_quality_and nn_adaptation_domain conj_quality_adaptation prep_adaptation_for det_NLP_the pobj_for_NLP nsubj_problem_tasks partmod_tasks_involved cop_problem_is advmod_problem_still det_problem_an amod_problem_open dep_important_problem '
+o,759,'They are also used for inducing alignments ',Wu,'nsubjpass_used_They auxpass_used_are advmod_used_also prep_used_for amod_alignments_inducing pobj_for_alignments '
+o,760,'In recent work , proposed a general framework for including morphological features in a phrase-based SMT system by factoring the representation of words into a vector of morphological features and allowing a phrase-based MT system to work on any of the factored representations , which is implemented in the Moses system ',Koehn,'prep_proposed_In amod_work_recent pobj_In_work nsubj_proposed_ det_framework_a amod_framework_general dobj_proposed_framework prep_proposed_for pcomp_for_including amod_features_morphological dobj_including_features prep_features_in det_system_a amod_system_phrase-based nn_system_SMT pobj_in_system prep_including_by pcomp_by_factoring det_representation_the dobj_factoring_representation prep_representation_of pobj_of_words prep_factoring_into det_vector_a pobj_into_vector prep_vector_of amod_features_morphological pobj_of_features cc_including_and conj_including_allowing det_system_a amod_system_phrase-based nn_system_MT dobj_allowing_system aux_work_to xcomp_allowing_work prep_work_on pobj_on_any prep_any_of det_representations_the amod_representations_factored pobj_of_representations nsubjpass_implemented_which auxpass_implemented_is rcmod_representations_implemented prep_implemented_in det_system_the nn_system_Moses pobj_in_system '
+o,761,'2 Architecture of the system The goal of statistical machine translation -LRB- SMT -RRB- is to produce a target sentence e from a source sentence f It is today common practice to use phrases as translation units and a log linear framework in order to introduce several models explaining the translation process : e ? ? = argmaxp -LRB- e f -RRB- = argmaxe -LCB- exp -LRB- summationdisplay i ihi -LRB- e , f -RRB- -RRB- -RCB- -LRB- 1 -RRB- The feature functions hi are the system models and the i weights are typically optimized to maximize a scoring function on a development set ',Koehn,'number_Architecture_2 nsubj_is_Architecture prep_Architecture_of det_system_the pobj_of_system det_goal_The dep_system_goal prep_goal_of amod_translation_statistical nn_translation_machine pobj_of_translation abbrev_Architecture_SMT aux_produce_to xcomp_is_produce det_sentence_a nn_sentence_target dobj_produce_sentence dep_from_e dep_practice_from det_sentence_a nn_sentence_source pobj_from_sentence dep_practice_f nsubj_practice_It cop_practice_is advmod_practice_today amod_practice_common dep_is_practice aux_use_to infmod_practice_use dobj_use_phrases mark__as nn_units_translation nsubj__units advcl_use_ cc__and det_log_a conj__log amod_framework_linear dobj_log_framework prep_framework_in pobj_in_order aux_introduce_to xcomp_log_introduce amod_models_several dobj_introduce_models dep_e_explaining det_process_the nn_process_translation dobj_explaining_process dep_use_e dep_use_= nsubj_=_argmaxp dep_f_e dep_-LRB-_f ccomp_=_= dobj_=_argmaxe dep_argmaxe_exp dep_argmaxe_summationdisplay dep_summationdisplay_i dep_summationdisplay_ihi dep_ihi_e appos_e_f dep_models_1 det_hi_The nn_hi_feature nn_hi_functions nsubj_models_hi cop_models_are det_models_the nn_models_system rcmod_argmaxe_models cc_models_and det_weights_the amod_weights_i nsubjpass_optimized_weights auxpass_optimized_are advmod_optimized_typically conj_models_optimized aux_maximize_to xcomp_optimized_maximize det_function_a amod_function_scoring dobj_maximize_function prep_=_on det__a nn__development nn__set pobj_on_ '
+o,762,'1 Introduction Sentiment analysis have been widely conducted in several domains such as movie reviews , product reviews , news and blog reviews ',Turney,'num_analysis_1 nn_analysis_Introduction nn_analysis_Sentiment nsubjpass_conducted_analysis aux_conducted_have auxpass_conducted_been advmod_conducted_widely prep_conducted_in amod_domains_several pobj_in_domains dep_as_such prep_domains_as nn_reviews_movie pobj_as_reviews nn_reviews_product conj_reviews_reviews conj_reviews_news cc_reviews_and amod_reviews_blog conj_reviews_reviews '
+o,763,'Their approaches include the use of a vector-based information retrieval technique \\/ bin\\/bash : line 1 : a : command not found Our do - mains are more varied , which may results in more recognition errors ',Chu-Carroll,'poss_approaches_Their nsubj_found_approaches rcmod_approaches_include det_use_the dobj_include_use prep_use_of det_information_a amod_information_vector-based pobj_of_information nn_\\/_retrieval nn_\\/_technique nn_\\/_ dep_include_\\/ dep_\\/_bin\\/bash dep_bin\\/bash_line num_line_1 dep_\\/_a dep_a_command neg_command_not nsubj_do_Our ccomp_found_do nsubjpass_varied_mains auxpass_varied_are advmod_varied_more parataxis_do_varied nsubj_results_which aux_results_may ccomp_do_results prep_results_in amod_errors_more nn_errors_recognition pobj_in_errors '
+o,764,'The corpus was aligned with GIZA + + and symmetrized with the grow-diag-finaland heuristic ',Koehn,'det_corpus_The nsubjpass_aligned_corpus auxpass_aligned_was prep_aligned_with nn_+_GIZA pobj_with_+ cc__+ amod_+_ cc__and conj__symmetrized prep_aligned_with det_heuristic_the amod_heuristic_grow-diag-finaland pobj_with_heuristic '
+p,765,'Since the use of cluster of machines is not always practical , showed a randomized data structure called Bloom filter , that can be used to construct space efficient language models 513 for SMT ',Talbot,'mark_practical_Since det_use_the nsubj_practical_use prep_use_of pobj_of_cluster prep_cluster_of pobj_of_machines cop_practical_is neg_practical_not advmod_practical_always advcl_showed_practical nsubj_showed_ det_structure_a amod_structure_randomized nn_structure_data nsubj_called_structure ccomp_showed_called nn_filter_Bloom dobj_called_filter nsubjpass_used_that aux_used_can auxpass_used_be rcmod_filter_used aux_construct_to purpcl_used_construct nn_models_space amod_models_efficient nn_models_language nsubj_513_models xcomp_construct_513 prep_513_for pobj_for_SMT '
+o,766,'BLEU was devised to provide automatic evaluation of MT output ',Papineni,'nn__BLEU nsubjpass_devised_ auxpass_devised_was aux_provide_to xcomp_devised_provide amod_evaluation_automatic dobj_provide_evaluation prep_evaluation_of nn_output_MT pobj_of_output '
+o,767,'Statistics in linguistics , Oxford : Basil Blackwell N Chinchor Evaluating message understanding systems : an analysis of the third Message Understanding Conference -LRB- MUC-3 1993 Computational Linguistics 19 409 -- 449 Chinchor , 1993 Chinchor , N , et al , 1993 ',Marcus,'dep_Chinchor_Statistics prep_Statistics_in pobj_in_linguistics appos_linguistics_Oxford nn_systems_Basil nn_systems_Blackwell nn_systems_ nn_systems_ nn_systems_ nn_systems_ nn_systems_ nn_systems_N nn_systems_Chinchor nn_systems_ nn_systems_ nn_systems_ nn_systems_Evaluating nn_systems_message nn_systems_understanding dep_Oxford_systems det_analysis_an dep_Oxford_analysis prep_analysis_of det__the nn__third nn_Conference_Message nn_Conference_Understanding dep_third_Conference nn__MUC-3 nn__ nn__ num__1993 nn__ nn__ nn__Computational nn__Linguistics nn__ nn__ num__19 pobj_of_ number_409_ num__409 number__449 dep_Oxford_ nn_Chinchor_ dep__Chinchor dep_Chinchor_1993 nn_Chinchor_ nn_Chinchor_ appos_Chinchor_N nn_al_et appos_Chinchor_al appos_Chinchor_1993 '
+o,768,'Note that it is straightforward to calculate these expected counts using a variant of the inside-outside algorithm applied to the dependency-parsing data structures for projective dependency structures , or the matrix-tree theorem for nonprojective dependency structures ',Smith,'complm_straightforward_that nsubj_straightforward_it cop_straightforward_is ccomp_Note_straightforward aux_calculate_to xcomp_straightforward_calculate det_counts_these amod_counts_expected dobj_calculate_counts partmod_counts_using det_variant_a dobj_using_variant prep_variant_of det__the amod__inside-outside nn__algorithm pobj_of_ partmod__applied prep_applied_to det__the amod__ amod__dependency-parsing nn__data nn__structures pobj_to_ prep__for amod_structures_projective amod_structures_dependency pobj_for_structures cc_counts_or det__the amod__matrix-tree nn__theorem conj_counts_ prep__for amod_structures_nonprojective amod_structures_dependency pobj_for_structures '
+o,769,'Following , we consider an anaphoric reference , NPi , correctly resolved if NPi and its closest antecedent are in the same coreference chain in the resulting partition ',Ponzetto,'prep_consider_Following pobj_Following_ nsubj_consider_we det_reference_an amod_reference_anaphoric nsubj_resolved_reference appos_reference_NPi advmod_resolved_correctly ccomp_consider_resolved mark_are_if nsubj_are_NPi cc_NPi_and poss_antecedent_its amod_antecedent_closest conj_NPi_antecedent advcl_resolved_are prep_are_in det_chain_the amod_chain_same nn_chain_coreference pobj_in_chain prep_chain_in det_partition_the amod_partition_resulting pobj_in_partition '
+o,770,'This algorithm adjusts the log-linear weights so that BLEU is maximized over a given development set ',Papineni,'det_algorithm_This nsubj_weights_algorithm dep_weights_adjusts det_weights_the amod_weights_log-linear advmod_maximized_so dep_maximized_that nn__BLEU nsubjpass_maximized_ auxpass_maximized_is dep_weights_maximized prep_maximized_over det_set_a amod_set_given nn_set_development pobj_over_set '
+o,771,'We discriminatively trained our parser in an on-line fashion using a variant of the voted perceptron ',Collins,'nsubj_trained_We advmod_trained_discriminatively poss_parser_our dobj_trained_parser prep_parser_in det_fashion_an amod_fashion_on-line pobj_in_fashion partmod_fashion_using det_variant_a dobj_using_variant prep_variant_of det_perceptron_the amod_perceptron_voted pobj_of_perceptron '
+o,772,'In fact , we found that it doesnt do so badly at all : the bitag HMM estimated by EM achieves a mean 1-to1 tagging accuracy of 40 \% , which is approximately the same as the 413 \% reported by for their sophisticated MRF model ',Haghighi,'prep_found_In pobj_In_fact nsubj_found_we complm_do_that nsubj_do_it aux_do_doesnt ccomp_found_do advmod_badly_so advmod_do_badly advmod_do_at dep_at_all det_HMM_the amod_HMM_bitag nsubj_achieves_HMM partmod_HMM_estimated prep_estimated_by pobj_by_EM parataxis_found_achieves det_accuracy_a amod_accuracy_mean amod_accuracy_1-to1 nn_accuracy_tagging dobj_achieves_accuracy prep_accuracy_of num_\%_40 pobj_of_\% nsubj_same_which cop_same_is advmod_same_approximately det_same_the rcmod_\%_same prep_same_as det_\%_the num_\%_413 pobj_as_\% partmod_\%_reported prep_reported_by pobj_by_ prep__for poss_model_their amod_model_sophisticated nn_model_MRF pobj_for_model '
+o,773,'2 Motivation and Prior Work While several authors have looked at the supervised adaptation case , there are less -LRB- and especially less successful -RRB- studies on semi-supervised domain adaptation ',Dredze,'dep_Motivation_2 cc_Motivation_and amod_Work_Prior conj_Motivation_Work mark_looked_While amod_authors_several nsubj_looked_authors aux_looked_have advcl_are_looked prep_looked_at det_case_the amod_case_supervised nn_case_adaptation pobj_at_case expl_are_there dep_Motivation_are acomp_are_less dep_-LRB-_and advmod_less_especially dep_-LRB-_less dep_less_successful dep_on_studies prep_are_on amod_adaptation_semi-supervised nn_adaptation_domain pobj_on_adaptation '
+o,774,'There are other types of variations for phrases ; for example , insertion , deletion or substitution of words , and permutation of words such as view point and point of view are such variations ',Berger,'expl_are_There amod_types_other nsubj_are_types prep_types_of pobj_of_variations prep_variations_for pobj_for_phrases prep_phrases_for pobj_for_example conj_example_insertion conj_example_deletion cc_example_or conj_example_substitution prep_phrases_of pobj_of_words cc_are_and nsubj_variations_permutation prep_permutation_of pobj_of_words dep_as_such prep_words_as nn_point_view pobj_as_point cc_point_and conj_point_point prep_point_of pobj_of_view cop_variations_are amod_variations_such conj_are_variations advmod_variations_ '
+o,775,'? ? search engines : uses the Altavista web browser , while we consider and combine the frequency information acquired from three web search engines ',Turney,'nn_engines_search nsubj_uses_engines nsubj_uses_ det_browser_the nn_browser_Altavista nn_browser_web dobj_uses_browser mark_consider_while nsubj_consider_we advcl_uses_consider cc_consider_and conj_consider_combine det_information_the nn_information_frequency dobj_consider_information partmod_information_acquired prep_acquired_from num_engines_three nn_engines_web nn_engines_search pobj_from_engines '
+o,776,'Many methods have been proposed to measure the co-occurrence relation between two words such as 2 , mutual information , t-test , and loglikelihood ',Dunning,'amod_methods_Many nsubjpass_proposed_methods aux_proposed_have auxpass_proposed_been aux_measure_to xcomp_proposed_measure det_relation_the amod_relation_co-occurrence dobj_measure_relation prep_relation_between num_words_two pobj_between_words dep_as_such prep_words_as num__2 pobj_as_ amod__mutual nn__information conj__ amod__t-test conj__ cc__and conj__loglikelihood '
+p,777,'1 Introduction The Maximum Entropy -LRB- ME -RRB- statistical framework has been successfully deployed in several NLP tasks ',Berger,'num__1 nn_Entropy_Introduction nn_Entropy_The nn_Entropy_Maximum measure_statistical_Entropy abbrev_Entropy_ME amod__statistical nn__framework nsubjpass_deployed_ aux_deployed_has auxpass_deployed_been advmod_deployed_successfully prep_deployed_in amod_tasks_several nn_tasks_NLP pobj_in_tasks '
+n,778,'Unfortunately , this is not the case for such widely used MT evaluation metrics as BLEU and NIST ',Papineni,'advmod_case_Unfortunately nsubj_case_this cop_case_is neg_case_not det_case_the prep_case_for amod_metrics_such advmod_used_widely amod_metrics_used nn_metrics_MT nn_metrics_evaluation pobj_for_metrics prep_case_as nn__BLEU pobj_as_ cc__and nn__NIST conj__ '
+o,779,'It has been argued that the reliability of a coding schema can be assessed only on the basis of judgments made by naive coders ',Carletta,'nsubjpass_argued_It aux_argued_has auxpass_argued_been complm_assessed_that det_reliability_the nsubjpass_assessed_reliability prep_reliability_of det_schema_a amod_schema_coding pobj_of_schema aux_assessed_can auxpass_assessed_be ccomp_argued_assessed advmod_assessed_only prep_assessed_on det_basis_the pobj_on_basis prep_basis_of pobj_of_judgments partmod_judgments_made prep_made_by amod_coders_naive pobj_by_coders '
+o,780,'to the pair-wise TER alignment described in ',Rosti,'dep_alignment_to det_alignment_the amod_alignment_pair-wise nn_alignment_TER partmod_alignment_described prep_described_in '
+o,781,'We obtain aligned parallel sentences and the phrase table after the training of Moses , which includes running GIZA + + , grow-diagonal-final symmetrization and phrase extraction ',Koehn,'nsubj_obtain_We ccomp_obtain_aligned amod_sentences_parallel dobj_aligned_sentences cc_sentences_and det_table_the nn_table_phrase conj_sentences_table prep_aligned_after det_training_the pobj_after_training prep_training_of pobj_of_Moses nsubj_includes_which rcmod_training_includes xcomp_includes_running nn__GIZA nn__+ nn__+ dobj_running_ amod_symmetrization_grow-diagonal-final conj__symmetrization cc__and nn_extraction_phrase conj__extraction '
+o,782,'From the above discussion , we can see that traditional tree sequence-based method uses single tree as translation input while the forestbased model uses single sub-tree as the basic translation unit that can only learn tree-to-string rules ',Galley,'prep_see_From det_discussion_the amod_discussion_above pobj_From_discussion nsubj_see_we aux_see_can complm_uses_that amod_method_traditional nn_method_tree nn_method_sequence-based nsubj_uses_method ccomp_see_uses amod_tree_single dobj_uses_tree prep_uses_as nn_input_translation pobj_as_input mark_uses_while det_model_the amod_model_forestbased nsubj_uses_model advcl_uses_uses amod_sub-tree_single dobj_uses_sub-tree prep_uses_as det_unit_the amod_unit_basic nn_unit_translation pobj_as_unit nsubj_learn_that aux_learn_can advmod_learn_only rcmod_unit_learn amod_rules_tree-to-string amod_rules_ dobj_learn_rules '
+o,783,'Our baseline method for ambiguity resolution is the Collins parser as implemented by Bikel ',Collins,'poss_method_Our amod_method_baseline nsubj_parser_method prep_method_for amod_resolution_ambiguity pobj_for_resolution cop_parser_is det_parser_the nn_parser_Collins mark_implemented_as advcl_parser_implemented prep_implemented_by pobj_by_Bikel '
+o,784,'We then built separate English-to-Spanish and Spanish-to-English directed word alignments using IBM model 4 , combined them using the intersect + grow heuristic , and extracted phrase-level translation pairs of maximum length 7 using the alignment template approach ',Och,'nsubj_built_We advmod_built_then dobj_built_separate dep_separate_English-to-Spanish cc_separate_and amod_alignments_Spanish-to-English amod_alignments_directed nn_alignments_word conj_separate_alignments xcomp_built_using nn_model_IBM nsubj__model num__4 xcomp_using_ partmod__combined nsubj_using_them dep_combined_using det__the amod__intersect cc_intersect_+ conj_intersect_grow amod__heuristic dobj_using_ cc_built_and conj_built_extracted amod_pairs_phrase-level nn_pairs_translation dobj_extracted_pairs prep_pairs_of nn_length_maximum pobj_of_length num_length_7 xcomp_built_using det_approach_the amod_approach_alignment nn_approach_template dobj_using_approach '
+o,785,'For instance , both Pang and Lee and consider the thumbs up\\/thumbs down decision : is a film review positive or negative ? ',Turney,'prep_consider_For pobj_For_instance preconj__both nn__Pang cc_Pang_and conj_Pang_Lee nsubj_consider_ cc__and nn__ conj__ det_thumbs_the nsubj_up\\/thumbs_thumbs ccomp_consider_up\\/thumbs prt_up\\/thumbs_down dobj_up\\/thumbs_decision cop_review_is det_review_a nn_review_film dep_decision_review amod_review_positive cc_positive_or conj_positive_negative '
+o,786,'Binarizing the syntax trees for syntax-based machine translation is similar in spirit to generalizing parsing models via markovization ',Collins,'det_trees_the nn_trees_syntax nsubj_similar_trees prep_trees_for amod_translation_syntax-based nn_translation_machine pobj_for_translation cop_similar_is ccomp_Binarizing_similar prep_similar_in pobj_in_spirit aux_generalizing_to xcomp_similar_generalizing amod_models_parsing dobj_generalizing_models prep_models_via pobj_via_markovization '
+o,787,'4 Options from the Translation Table Phrase-based statistical machine translation methods acquire their translation knowledge in form of large phrase translation tables automatically from large amounts of translated texts ',Koehn,'num_Options_4 nsubj_Table_Options prep_Options_from det_Translation_the pobj_from_Translation amod_methods_Phrase-based amod_methods_statistical nn_methods_machine nn_methods_translation nsubj_acquire_methods ccomp_Table_acquire poss_knowledge_their nn_knowledge_translation dobj_acquire_knowledge prep_acquire_in pobj_in_form prep_form_of amod_tables_large nn_tables_phrase nn_tables_translation pobj_of_tables advmod_acquire_automatically prep_acquire_from amod_amounts_large pobj_from_amounts prep_amounts_of amod_texts_translated pobj_of_texts '
+o,788,'For example , collected reviews from a movie database and rated them as positive , negative , or neutral based on the rating -LRB- eg , number of stars -RRB- given by the reviewer ',Pang,'prep_collected_For pobj_For_example nsubj_collected_ dobj_collected_reviews prep_collected_from det_database_a nn_database_movie pobj_from_database cc_collected_and conj_collected_rated dobj_rated_them prep_rated_as pobj_as_positive conj_positive_negative cc_positive_or conj_positive_neutral prep_rated_based dep_based_on det_rating_the pobj_on_rating dep_rating_eg appos_eg_number prep_number_of pobj_of_stars prep_rating_given dep_given_by det_reviewer_the pobj_by_reviewer '
+o,789,'The earliest work in this direction are those of , , , , and ',Weeds,'det_work_The amod_work_earliest nsubj_those_work prep_work_in det_direction_this pobj_in_direction cop_those_are prep_those_of nn_and_ appos_and_ appos_and_ appos_and_ appos_and_ pobj_of_and '
+o,790,' established that it is important to tune -LRB- the trade-off between Precision and Recall -RRB- to maximize performance ',Fraser,'nsubj_established_ complm_important_that nsubj_important_it cop_important_is ccomp_established_important prep_important_to pobj_to_tune det_trade-off_the dep_tune_trade-off prep_trade-off_between pobj_between_Precision cc_Precision_and conj_Precision_Recall aux_maximize_to infmod_tune_maximize dobj_maximize_performance '
+o,791,'Moreover , under this view , SMT becomes quite similar to sequential natural language annotation problems such as part-of-speech tagging and shallow parsing , and the novel training algorithm presented in this paper is actually most similar to work on training algorithms presented for these task , eg the on-line training algorithm presented in and the perceptron training algorithm presented in ',Collins,'advmod_similar_Moreover prep_similar_under det_view_this pobj_under_view nsubj_similar_SMT cop_similar_becomes advmod_similar_quite aux_sequential_to xcomp_similar_sequential amod_problems_natural nn_problems_language nn_problems_annotation dobj_sequential_problems dep_as_such prep_problems_as amod_tagging_part-of-speech pobj_as_tagging cc_tagging_and nn_parsing_shallow conj_tagging_parsing cc_similar_and det_algorithm_the amod_algorithm_novel nn_algorithm_training nsubj_similar_algorithm partmod_algorithm_presented prep_presented_in det_paper_this pobj_in_paper cop_similar_is advmod_similar_actually advmod_similar_most conj_similar_similar aux_work_to xcomp_similar_work prep_work_on nn_algorithms_training pobj_on_algorithms partmod_algorithms_presented prep_presented_for det_task_these pobj_for_task partmod_task_eg det_algorithm_the amod_algorithm_on-line nn_algorithm_training dobj_eg_algorithm partmod_algorithm_presented prep_presented_in pobj_in_ cc_similar_and det_algorithm_the nn_algorithm_perceptron nn_algorithm_training nsubj_presented_algorithm conj_similar_presented prt_presented_in '
+o,792,'Feature weights vector are trained discriminatively in concert with the language model weight to maximize the BLEU automatic evaluation metric via Minimum Error Rate Training -LRB- MERT -RRB- ',Och,'amod_vector_Feature nn_vector_weights nsubjpass_trained_vector auxpass_trained_are advmod_trained_discriminatively prep_trained_in pobj_in_concert prep_trained_with det_weight_the nn_weight_language nn_weight_model pobj_with_weight aux_maximize_to xcomp_trained_maximize det_metric_the amod_metric_BLEU amod_metric_ amod_metric_automatic nn_metric_evaluation dobj_maximize_metric prep_metric_via nn_Training_Minimum nn_Training_Error nn_Training_Rate pobj_via_Training abbrev_Training_MERT '
+o,793,' and Wiebe -LRB- 2000 -RRB- focused on learning adjectives and adjectival phrases and Wiebe et al ',Turney,'nsubj_focused_ cc__and conj__Wiebe appos__2000 prep_focused_on pcomp_on_learning dobj_learning_adjectives cc_adjectives_and amod_phrases_adjectival conj_adjectives_phrases cc_phrases_and conj_phrases_Wiebe cc_Wiebe_et conj_Wiebe_al '
+p,794,'To achieve efficient parsing , we use a beam search strategy like the previous methods ',Collins,'aux_achieve_To dep_use_achieve amod_parsing_efficient dobj_achieve_parsing nsubj_use_we det_strategy_a nn_strategy_beam nn_strategy_search dobj_use_strategy prep_strategy_like det_methods_the amod_methods_previous pobj_like_methods '
+o,795,'A number of alignment techniques have been proposed , varying from statistical methods to lexical methods ',Brown,'det_number_A nsubjpass_proposed_number prep_number_of amod_techniques_alignment pobj_of_techniques aux_proposed_have auxpass_proposed_been dobj_proposed_varying prep_varying_from amod_methods_statistical pobj_from_methods partmod_methods_ prep__to amod__lexical nn_