日撸java_day61-62

news/2024/11/24 13:33:29/

决策树

package machineLearning.decisiontree;import weka.core.Instance;
import weka.core.Instances;import java.io.FileReader;
import java.util.Arrays;/*** ClassName: ID3* Package: machineLearning.decisiontree* Description:  The ID3 decision tree inductive algorithm.** @Author: luv_x_c* @Create: 2023/8/7 14:55*/
public class ID3 {/*** The data.*/Instances dataset;/*** Is the dataset pure(Only one label)?*/boolean pure;/*** The number of classes. For binary classification it is 2.*/int numClasses;/*** Available instances. Other instances do not belong this branch.*/int[] availableInstances;/*** Available attributes. Other attributes have been selected int the path from the root.*/int[] availableAttributes;/*** The selected attribute.*/int splitAttributes;/*** The children nodes.*/ID3[] children;/*** My label. Inner nodes also have a label. For example, <outlook =sunny ,humidity = high > never appear it the* training data, but <humidity =high> is valid  in other cases.*/int label;/*** The prediction, including queried and predicted labels.*/int[] predicts;/*** Small block cannot be split further.*/static int smallBlockThreshold = 3;/*** The constructor.** @param paraFileName The given file.*/public ID3(String paraFileName) {dataset = null;try {FileReader fileReader = new FileReader(paraFileName);dataset = new Instances(fileReader);fileReader.close();} catch (Exception ee) {System.out.println("Cannot read the file: " + paraFileName + "\r\n" + ee);System.exit(0);}// Of trydataset.setClassIndex(dataset.numAttributes() - 1);numClasses = dataset.classAttribute().numValues();availableInstances = new int[dataset.numInstances()];for (int i = 0; i < availableInstances.length; i++) {availableInstances[i] = i;}//Of for iavailableAttributes = new int[dataset.numAttributes() - 1];for (int i = 0; i < availableAttributes.length; i++) {availableAttributes[i] = i;}// OF for i//Initialize.children = null;label = getMajorityClass(availableInstances);pure = pureJudge(availableInstances);}// Of the first constructor/*** The constructor.*/public ID3(Instances paraDataset, int[] paraAvailableInstances, int[] paraAvailableAttributes) {//Copy its reference instead of clone the availableInstances.dataset = paraDataset;availableInstances = paraAvailableInstances;availableAttributes = paraAvailableAttributes;//Initialize.children = null;label = getMajorityClass(availableInstances);pure = pureJudge(availableInstances);}// OF the second constructor/*** Is the given block pure?** @param paraBlock The block.* @return True if pure.*/public boolean pureJudge(int[] paraBlock) {pure = true;for (int i = 1; i < paraBlock.length; i++) {if (dataset.instance(paraBlock[i]).classValue() != dataset.instance(paraBlock[0]).classValue()) {pure = false;break;}//Of if}//Of for ireturn pure;}//Of pureJudge/*** Compute the majority class of the given block for voting.** @param paraBlock The block.* @return The majority class.*/public int getMajorityClass(int[] paraBlock) {int[] tempClassCounts = new int[dataset.numClasses()];for (int i = 0; i < paraBlock.length; i++) {tempClassCounts[(int) dataset.instance(paraBlock[i]).classValue()]++;}//OF for iint resultMajorityClass = -1;int tempMaxCount = -1;for (int i = 0; i < tempClassCounts.length; i++) {if (tempMaxCount < tempClassCounts[i]) {resultMajorityClass = i;tempMaxCount = tempClassCounts[i];}//Of if}//Of for ireturn resultMajorityClass;}//Of getMajorityClass/*** Select the best attribute.** @return The best attribute index.*/public int selectBestAttribute() {splitAttributes = -1;double tempMinimalEntropy = 10000;double tempEntropy;for (int i = 0; i < availableAttributes.length; i++) {tempEntropy = conditionalEntropy(availableAttributes[i]);if (tempMinimalEntropy > tempEntropy) {tempMinimalEntropy = tempEntropy;splitAttributes = availableAttributes[i];}//Of if}//Of for ireturn splitAttributes;}//Of selectBestAttribute/*** Compute the conditional entropy of an attribute.** @param paraAttribute The given attribute.* @return The entropy.*/public double conditionalEntropy(int paraAttribute) {// Step1. Statistics.int tempNumClasses = dataset.numClasses();int tempNumValues = dataset.attribute(paraAttribute).numValues();int tempNumInstances = availableInstances.length;double[] tempValueCounts = new double[tempNumValues];double[][] tempCountMatrix = new double[tempNumValues][tempNumClasses];int tempClass, tempValue;for (int i = 0; i < tempNumInstances; i++) {tempClass = (int) dataset.instance(availableInstances[i]).classValue();tempValue = (int) dataset.instance(availableInstances[i]).value(paraAttribute);tempValueCounts[tempValue]++;tempCountMatrix[tempValue][tempClass]++;}//Of for i// Step2.double resultEntropy = 0;double tempEntropy, tempFraction;for (int i = 0; i < tempNumValues; i++) {if (tempValueCounts[i] == 0) {continue;}//Of iftempEntropy = 0;for (int j = 0; j < tempNumClasses; j++) {tempFraction = tempCountMatrix[i][j] / tempValueCounts[i];if (tempFraction == 0) {continue;}//Of iftempEntropy += -tempFraction * Math.log(tempFraction);}//Of for jresultEntropy += tempValueCounts[i] / tempNumInstances * tempEntropy;}//Of for ireturn resultEntropy;}//Of conditionalEntropy/*** Split the data according to the given attribute.** @param paraAttribute The given attribute.* @return The blocks.*/public int[][] splitData(int paraAttribute) {int tempNumValues = dataset.attribute(paraAttribute).numValues();int[][] resultBlocks = new int[tempNumValues][];int[] tempSizes = new int[tempNumValues];// First scan to count the size of each block.int tempValue;for (int i = 0; i < availableInstances.length; i++) {tempValue = (int) dataset.instance(availableInstances[i]).value(paraAttribute);tempSizes[tempValue]++;}//Of for i// Allocate space.for (int i = 0; i < tempNumValues; i++) {resultBlocks[i] = new int[tempSizes[i]];}//Of for i// Second scan to fill.Arrays.fill(tempSizes, 0);for (int i = 0; i < availableInstances.length; i++) {tempValue = (int) dataset.instance(availableInstances[i]).value(paraAttribute);// Copy dataresultBlocks[tempValue][tempSizes[tempValue]] = availableInstances[i];tempSizes[tempValue]++;}// OF for ireturn resultBlocks;}//Of splitData/*** Build the tree recursively.*/public void buildTree() {if (pureJudge(availableInstances)) {return;}//OF ifif (availableInstances.length <= smallBlockThreshold) {return;}//Of ifselectBestAttribute();int[][] tempSubBlocks = splitData(splitAttributes);children = new ID3[tempSubBlocks.length];//Construct the remaining attribute set.int[] tempRemainingAttributes = new int[availableAttributes.length - 1];for (int i = 0; i < availableAttributes.length; i++) {if (availableAttributes[i] < splitAttributes) {tempRemainingAttributes[i] = availableAttributes[i];} else if (availableAttributes[i] > splitAttributes) {tempRemainingAttributes[i - 1] = availableAttributes[i];}//Of if}//Of for i//Construct children.for (int i = 0; i < children.length; i++) {if ((tempSubBlocks[i] == null) || (tempSubBlocks[i].length == 0)) {children[i] = null;} else {children[i] = new ID3(dataset, tempSubBlocks[i], tempRemainingAttributes);children[i].buildTree();}//Of if}//OF for i}//OF buildTree/*** Classify an instance,** @param paraInstance The given instance.* @return The prediction.*/public int classify(Instance paraInstance) {if (children == null) {return label;}//Of ifID3 tempChild = children[(int) paraInstance.value(splitAttributes)];if (tempChild == null) {return label;}//Of ifreturn tempChild.classify(paraInstance);}//Of classify/*** Test on n testing set.** @param paraDataset The given testing set.* @return The accuracy.*/public double test(Instances paraDataset) {double tempCorrect = 0;for (int i = 0; i < paraDataset.numInstances(); i++) {if (classify(paraDataset.instance(i)) == (int) paraDataset.instance(i).classValue()) {tempCorrect++;}//Of if}//Of for ireturn tempCorrect / paraDataset.numInstances();}//Of test/*** Test on the training set.** @return The accuracy.*/public double selfTest() {return test(dataset);}//Of selfTest@Overridepublic String toString() {String resultString = "";String tempAttributeName = dataset.attribute(splitAttributes).name();if (children == null) {resultString += "class = " + label;} else {for (int i = 0; i < children.length; i++) {if (children[i] == null) {resultString += tempAttributeName + " = " + dataset.attribute(splitAttributes).value(i) + " : " + "class= " + label + "\r\n";} else {resultString += tempAttributeName + " = " + dataset.attribute(splitAttributes).value(i) + " : " + children[i] + "\r\n";}//OF if}//Of for i}//Of ifreturn resultString;}//OF toString/*** Test this class.*/public static void id3Test() {ID3 tempID3 = new ID3("E:\\java_code\\data\\sampledata\\weather.arff");ID3.smallBlockThreshold = 3;tempID3.buildTree();System.out.println("The tree is: \r\n" + tempID3);double tempAccuracy = tempID3.selfTest();System.out.println("The accuracy is: " + tempAccuracy);}//Of id3Test/*** The entrance of the program.** @param args Not used now.*/public static void main(String[] args) {id3Test();}//OF main
}//Of class ID3

 


http://www.ppmy.cn/news/1026986.html

相关文章

预测知识 | 神经网络、机器学习、深度学习

预测知识 | 预测技术流程及模型评价 目录 预测知识 | 预测技术流程及模型评价神经网络机器学习深度学习参考资料 神经网络 神经网络&#xff08;neural network&#xff09;是机器学习的一个重要分支&#xff0c;也是深度学习的核心算法。神经网络的名字和结构&#xff0c;源自…

【数据结构•堆】经典问题:k路归并

题目描述 k路归并问题&#xff1a;   把k个有序表合并成一个有序表。&#xff08; k < 10^4 &#xff09; 输入输出格式 输入格式&#xff1a; 输入数据共有 2*k1 行。   第一行&#xff0c;一个整数k&#xff08; k < 10^4 &#xff09;&#xff0c;表示有k个有序…

知识图谱基本工具Neo4j使用笔记 四 :使用csv文件批量导入图谱数据

文章目录 一、系统说明二、说明三、简单介绍1. 相关代码以及参数2. 简单示例 四、实际数据实践1. 前期准备&#xff08;1&#xff09; 创建一个用于测试的neo4j数据库&#xff08;2&#xff09;启动neo4j 查看数据库 2. 实践&#xff08;1&#xff09; OK 上面完成后&#xff0…

从KM到Cure Models:常用生存分析方法的优缺点

一、引言 生存分析是一种用于研究个体生命长度或事件发生时间的统计方法。在许多领域中&#xff0c;如医学、社会学、经济学和工程学等&#xff0c;生存分析被广泛应用于分析个体的生存时间&#xff0c;并研究相关因素对生存时间的影响。通过生存分析&#xff0c;我们可以评估特…

hadoop3.3 50070端口起不来

有人会让你在 hdfs-site.xml中添加: <property><name>dfs.namenode.http.address</name><value>slave1:50070</value></property> 但是不起作用. 新版本name变了,应该配置如下: </configuration><property><name>dfs.h…

fastApi基础

1、fastApi简介 官方文档&#xff1a;https://fastapi.tiangolo.com/ 源码&#xff1a; https://github.com/tiangolo/fastapi 2、环境准备 安装python 安装pycharm 安装fastAPI 安装 uvicorn 查看已经安装的第三方库&#xff1a;pip list 查看pip 配置信息&#xff1a;pip co…

2308d的静态构造函数循环依赖示例

原文 //Steve: __gshared string[string] dict; shared static this() {dict ["a" : "b"]; }这里有两个论点:这不能是CRT构造器,因为它依赖于D运行时,并且认为它应该进入自己的模块是一个QoL问题,当你想要私有到类而不是私有到模块时,可为类提供它,因为语…

【python】pycharm 2023.02导入matplotlib报错

换新电脑、重装系统后&#xff0c;新装的pycharm 2023.02版本导入matplotlib会报错&#xff1a;找不到指定的模块 需要重新安装 Microsoft Visual C 2015 Redistributable Update 3或更高版本 下载链接&#xff1a;Download Microsoft Visual C 2015 Redistributable Update …