Skip to content

Commit

Permalink
Added classifier code and test data
Browse files Browse the repository at this point in the history
  • Loading branch information
awaisathar committed May 26, 2016
1 parent 7c37960 commit be84fb3
Show file tree
Hide file tree
Showing 4 changed files with 7,379 additions and 8 deletions.
10 changes: 2 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
.idea/

29 changes: 29 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.chaoticity.citationsentiment</groupId>
<artifactId>citationsentimentclassifier</artifactId>
<version>1.0-SNAPSHOT</version>

<repositories>
<repository>
<id>maven</id>
<url>https://repo1.maven.org/maven2</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>nz.ac.waikato.cms.weka</groupId>
<artifactId>weka-stable</artifactId>
<version>3.6.6</version>
</dependency>
<dependency>
<groupId>nz.ac.waikato.cms.weka</groupId>
<artifactId>LibSVM</artifactId>
<version>1.0.3</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.chaoticity.citationsentiment;

import weka.classifiers.Evaluation;
import weka.classifiers.functions.LibSVM;
import weka.core.Instances;
import weka.core.converters.ConverterUtils;
import weka.core.tokenizers.NGramTokenizer;
import weka.core.tokenizers.WordTokenizer;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;

import java.util.Random;

/**
* Code and data for citation sentiment classification reported in http://www.aclweb.org/anthology/P11-3015
* The file test.arff contains only the test set with dependency triplets generated with Stanford CoreNLP
* Full corpus available at http://www.cl.cam.ac.uk/~aa496/citation-sentiment-corpus
*
* @author Awais Athar
*/
public class CitationSentimentClassifier {


/**
* @param args the command line arguments
*/
public static void main(String[] args) throws Exception {
ConverterUtils.DataSource source = new ConverterUtils.DataSource("test.arff");
Instances data = source.getDataSet();

// Set class attribute
data.setClassIndex(data.attribute("@@class@@").index());

// delete unused attributes
data.deleteAttributeAt(1);
data.deleteAttributeAt(2);

// split dependencies on space
StringToWordVector unigramFilter = new StringToWordVector();
unigramFilter.setInputFormat(data);
unigramFilter.setIDFTransform(true);
unigramFilter.setAttributeIndices("3");
WordTokenizer whitespaceTokenizer = new WordTokenizer();
whitespaceTokenizer.setDelimiters(" ");
unigramFilter.setTokenizer(whitespaceTokenizer);
data = Filter.useFilter(data,unigramFilter);

// make trigrams from citation sentences
StringToWordVector trigramFilter = new StringToWordVector();
trigramFilter.setInputFormat(data);
trigramFilter.setIDFTransform(true);
trigramFilter.setAttributeIndices("2");
NGramTokenizer tokenizer = new NGramTokenizer();
tokenizer.setNGramMinSize(1);
tokenizer.setNGramMaxSize(3);
trigramFilter.setTokenizer(tokenizer);
data = Filter.useFilter(data,trigramFilter);

// Train and test 10x cross-validation
int folds = 10;
LibSVM svm = new LibSVM();
svm.setCost(1000);
Evaluation eval = new Evaluation(data);
eval.crossValidateModel(svm, data, folds, new Random(1));
System.out.println(eval.toMatrixString());
System.out.println(eval.toSummaryString());
System.out.println(eval.toClassDetailsString());
}


}
Loading

0 comments on commit be84fb3

Please sign in to comment.