-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7c37960
commit be84fb3
Showing
4 changed files
with
7,379 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,3 @@ | ||
target/ | ||
pom.xml.tag | ||
pom.xml.releaseBackup | ||
pom.xml.versionsBackup | ||
pom.xml.next | ||
release.properties | ||
dependency-reduced-pom.xml | ||
buildNumber.properties | ||
.mvn/timing.properties | ||
.idea/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>com.chaoticity.citationsentiment</groupId> | ||
<artifactId>citationsentimentclassifier</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
|
||
<repositories> | ||
<repository> | ||
<id>maven</id> | ||
<url>https://repo1.maven.org/maven2</url> | ||
</repository> | ||
</repositories> | ||
<dependencies> | ||
<dependency> | ||
<groupId>nz.ac.waikato.cms.weka</groupId> | ||
<artifactId>weka-stable</artifactId> | ||
<version>3.6.6</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>nz.ac.waikato.cms.weka</groupId> | ||
<artifactId>LibSVM</artifactId> | ||
<version>1.0.3</version> | ||
</dependency> | ||
</dependencies> | ||
</project> |
75 changes: 75 additions & 0 deletions
75
src/main/java/com/chaoticity/citationsentiment/CitationSentimentClassifier.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* To change this template, choose Tools | Templates | ||
* and open the template in the editor. | ||
*/ | ||
package com.chaoticity.citationsentiment; | ||
|
||
import weka.classifiers.Evaluation; | ||
import weka.classifiers.functions.LibSVM; | ||
import weka.core.Instances; | ||
import weka.core.converters.ConverterUtils; | ||
import weka.core.tokenizers.NGramTokenizer; | ||
import weka.core.tokenizers.WordTokenizer; | ||
import weka.filters.Filter; | ||
import weka.filters.unsupervised.attribute.StringToWordVector; | ||
|
||
import java.util.Random; | ||
|
||
/** | ||
* Code and data for citation sentiment classification reported in http://www.aclweb.org/anthology/P11-3015 | ||
* The file test.arff contains only the test set with dependency triplets generated with Stanford CoreNLP | ||
* Full corpus available at http://www.cl.cam.ac.uk/~aa496/citation-sentiment-corpus | ||
* | ||
* @author Awais Athar | ||
*/ | ||
public class CitationSentimentClassifier { | ||
|
||
|
||
/** | ||
* @param args the command line arguments | ||
*/ | ||
public static void main(String[] args) throws Exception { | ||
ConverterUtils.DataSource source = new ConverterUtils.DataSource("test.arff"); | ||
Instances data = source.getDataSet(); | ||
|
||
// Set class attribute | ||
data.setClassIndex(data.attribute("@@class@@").index()); | ||
|
||
// delete unused attributes | ||
data.deleteAttributeAt(1); | ||
data.deleteAttributeAt(2); | ||
|
||
// split dependencies on space | ||
StringToWordVector unigramFilter = new StringToWordVector(); | ||
unigramFilter.setInputFormat(data); | ||
unigramFilter.setIDFTransform(true); | ||
unigramFilter.setAttributeIndices("3"); | ||
WordTokenizer whitespaceTokenizer = new WordTokenizer(); | ||
whitespaceTokenizer.setDelimiters(" "); | ||
unigramFilter.setTokenizer(whitespaceTokenizer); | ||
data = Filter.useFilter(data,unigramFilter); | ||
|
||
// make trigrams from citation sentences | ||
StringToWordVector trigramFilter = new StringToWordVector(); | ||
trigramFilter.setInputFormat(data); | ||
trigramFilter.setIDFTransform(true); | ||
trigramFilter.setAttributeIndices("2"); | ||
NGramTokenizer tokenizer = new NGramTokenizer(); | ||
tokenizer.setNGramMinSize(1); | ||
tokenizer.setNGramMaxSize(3); | ||
trigramFilter.setTokenizer(tokenizer); | ||
data = Filter.useFilter(data,trigramFilter); | ||
|
||
// Train and test 10x cross-validation | ||
int folds = 10; | ||
LibSVM svm = new LibSVM(); | ||
svm.setCost(1000); | ||
Evaluation eval = new Evaluation(data); | ||
eval.crossValidateModel(svm, data, folds, new Random(1)); | ||
System.out.println(eval.toMatrixString()); | ||
System.out.println(eval.toSummaryString()); | ||
System.out.println(eval.toClassDetailsString()); | ||
} | ||
|
||
|
||
} |
Oops, something went wrong.