01/10/2018, 09:48

Code bị lỗi "NameError: name 'createDataSet' is not defined"

from math import log
import operator

def createDataset():
    dataSet = [[79, 24.7252, 0.818, 9.170, 37.383, yes],
               [89, 25.9909, 0.871, 7.561, 24.685, yes],
               [70, 25.3934, 1.358, 5.347, 40.620, yes],
               [88, 23.2254, 0.714, 7.354, 56.782, yes],
               [85, 24.6097, 0.748, 6.760, 58.358, yes],
               [68, 25.0762, 0.935, 4.939, 67.123, no],
               [70, 19.8839, 1.040, 4.321, 26.399, no],
               [69, 25.0593, 1.002, 4.212, 47.515, no],
               [74, 25.6544, 0.987, 5.605, 26.132, no],
               [79, 19.9594, 0.863, 5.204, 60.267, no],
               [76, 22.5981, 0.889, 4.704, 27.026, yes],
               [76, 26.4236, 0.886, 5.115, 43.256, no],
               [62, 20.3223, 0.889, 5.741, 51.097, yes],
               [69, 19.3698, 0.790, 3.880, 49.678, no],
               [72, 24.2215, 0.988, 5.844, 41.672, no],
               [67, 32.1120, 1.119, 4.160, 60.356, no],
               [74, 25.3934, 1.037, 6.728, 40.225, no],
               [69, 23.8895, 0.893, 4.203, 27.334, no],
               [78, 24.6755, 0.850, 7.347, 28.893, yes],
               [71, 27.1314, 0.790, 4.467, 38.173, no],
               [74, 23.0518, 0.597, 4.835, 35.141, yes],
               [76, 23.4568, 0.889, 5.345, 27.568, yes],
               [75, 23.5457, 0.803, 3.773, 36.726, yes],
               [70, 23.3234, 0.919, 3.672, 40.093, no],
               [69, 22.8625, 0.870, 4.552, 29.627, yes],
               [71, 22.0384, 0.811, 4.286, 30.380, no],
               [80, 24.6914, 0.859, 5.706, 37.529, yes],
               [79, 26.8519, 0.867, 3.563, 43.924, yes],
               [72, 27.1809, 0.717, 3.760, 39.714, no],
               [78, 23.9512, 0.822, 3.453, 27.294, no],
               [80, 28.3874, 1.004, 5.948, 33.376, yes],
               [79, 23.5102, 0.738, 4.193, 65.640, no],
               [67, 19.7232, 0.865, 4.443, 36.252, yes],
               [84, 27.4406, 0.808, 5.482, 33.539, yes],
               [78, 28.6661, 0.955, 8.815, 42.398, no],
               [65, 23.7812, 0.912, 4.704, 39.254, no],
               [70, 23.4493, 0.857, 4.138, 75.947, no],
               [67, 25.5354, 0.855, 3.727, 41.851, no],
               [74, 24.7409, 0.959, 3.967, 42.293, no],
               [73, 22.2291, 1.036, 4.438, 40.222, no],
               [74, 34.4753, 1.092, 7.271, 45.434, no],
               [68, 32.1929, 0.000, 4.269, 50.841, yes],
               [80, 23.3355, 0.759, 4.856, 31.114, no],
               [78, 22.7903, 0.757, 4.831, 73.343, no],
               [79, 24.6097, 0.671, 4.870, 68.924, yes],
               [72, 27.5802, 0.814, 3.021, 27.088, no,]
               [67, 30.1205, 1.101, 7.538, 35.487, yes],
               [70, 25.8166, 0.818, 3.564, 36.001, no],
               [69, 30.4218, 1.088, 3.826, 33.833, no],
               [67, 28.7132, 0.934, 3.996, 56.167, no],
               [74, 34.5429, 0.969, 6.762, 43.099, no],
               [71, 24.6097, 0.794, 4.350, 39.023, no],
               [67, 23.5294, 0.830, 3.176, 36.595, no],
               [67, 25.6173, 1.057, 3.738, 32.550, no],
               [65, 25.3086, 1.160, 3.060, 44.757, no],
               [66, 24,8358, 0.811, 3,263, 26.941, no],
               [69, 22.3094, 0.977, 3,106, 27.951, no],
               [72, 26,5285, 1.063, 6.970, 41.188, no],
               [75, 25.8546, 1.091, 4.798, 36.045, no],
               [70, 20.6790, 0.741, 3.908, 30.198, no],
               [74, 28.3675, 1.045, 4.784, 31.339, no],
               [71, 29.0688, 1,066, 4,527, 24.252, no],
               [65, 23.9995, 0.841, 3.089, 79.910, no],
               [77, 22.9819, 1.015, 4.041, 57.147, no],
               [67, 33.3598, 1.129, 7.239, 67.103, yes],
               [66, 27.1314, 1.030, 4.096, 29.435, no],
               [70, 24.7676, 0,896, 4.352, 44.291, no],
               [70, 24.4193, 1.106, 2.823, 37.348, no]]
    labels = ['age', 'bmi', 'bmd', 'ictp', 'pinp']
    # change to discrete values
    return dataSet, labels

def calcShannonEnt(dataSet):
    numEntries = len(dataSet)
    labelCounts = {}
    for featVec in dataSet:  # the the number of unique elements and their occurance
        currentLabel = featVec[-1]
        if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0
        labelCounts[currentLabel] += 1
    shannonEnt = 0.0
    for key in labelCounts:
        prob = float(labelCounts[key]) / numEntries
        shannonEnt -= prob * log(prob, 2)  # log base 2
    return shannonEnt


def splitDataSet(dataSet, axis, value):
    retDataSet = []
    for featVec in dataSet:
        if featVec[axis] == value:
            reducedFeatVec = featVec[:axis]  # chop out axis used for splitting
            reducedFeatVec.extend(featVec[axis + 1:])
            retDataSet.append(reducedFeatVec)
    return retDataSet


def chooseBestFeatureToSplit(dataSet):
    numFeatures = len(dataSet[0]) - 1  # the last column is used for the labels
    baseEntropy = calcShannonEnt(dataSet)
    bestInfoGain = 0.0;
    bestFeature = -1
    for i in range(numFeatures):  # iterate over all the features
        featList = [example[i] for example in dataSet]  # create a list of all the examples of this feature
        uniqueVals = set(featList)  # get a set of unique values
        newEntropy = 0.0
        for value in uniqueVals:
            subDataSet = splitDataSet(dataSet, i, value)
            prob = len(subDataSet) / float(len(dataSet))
            newEntropy += prob * calcShannonEnt(subDataSet)


        infoGain = baseEntropy - newEntropy  # calculate the info gain; ie reduction in entropy
        """
        print("feature : " + str(i))
        print("baseEntropy : "+str(baseEntropy))
        print("newEntropy : " + str(newEntropy))
        print("infoGain : " + str(infoGain))
        """
        if (infoGain > bestInfoGain):  # compare this to the best gain so far
            bestInfoGain = infoGain  # if better than current best, set to best
            bestFeature = i
    return bestFeature  # returns an integer


def majorityCnt(classList):
    classCount = {}
    for vote in classList:
        if vote not in classCount.keys(): classCount[vote] = 0
        classCount[vote] += 1
    sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]


def createTree(dataSet, labels):
    # extracting data
    classList = [example[-1] for example in dataSet]
    if classList.count(classList[0]) == len(classList):
        return classList[0]  # stop splitting when all of the classes are equal
    if len(dataSet[0]) == 1:  # stop splitting when there are no more features in dataSet
        return majorityCnt(classList)
    # use Information Gain
    bestFeat = chooseBestFeatureToSplit(dataSet)
    bestFeatLabel = labels[bestFeat]

    #build a tree recursively
    myTree = {bestFeatLabel: {}}
    #print("myTree : "+labels[bestFeat])
    del (labels[bestFeat])
    featValues = [example[bestFeat] for example in dataSet]
    #print("featValues: "+str(featValues))
    uniqueVals = set(featValues)
    #print("uniqueVals: " + str(uniqueVals))
    for value in uniqueVals:
        subLabels = labels[:]  # copy all of labels, so trees don't mess up existing labels
        #print("subLabels"+str(subLabels))
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)
        #print("myTree : " + str(myTree))
    return myTree


def classify(inputTree, featLabels, testVec):
    firstStr = inputTree.keys()[0]
    #print("fistStr : "+firstStr)
    secondDict = inputTree[firstStr]
    #print("secondDict : " + str(secondDict))
    featIndex = featLabels.index(firstStr)
    #print("featIndex : " + str(featIndex))
    key = testVec[featIndex]
    #print("key : " + str(key))
    valueOfFeat = secondDict[key]
    #print("valueOfFeat : " + str(valueOfFeat))
    if isinstance(valueOfFeat, dict):
        #print("is instance: "+str(valueOfFeat))
        classLabel = classify(valueOfFeat, featLabels, testVec)
    else:
        #print("is Not instance: " + valueOfFeat)
        classLabel = valueOfFeat
    return classLabel


def storeTree(inputTree, filename):
    import pickle
    fw = open(filename, 'w')
    pickle.dump(inputTree, fw)
    fw.close()


def grabTree(filename):
    import pickle
    fr = open(filename)
    return pickle.load(fr)

# collect data
myDat, labels = createDataSet()

#build a tree
mytree = createTree(myDat, labels)
print(mytree)

#run test
answer = classifi(mytree, ['age', 'bmi', 'bmd', 'ictp', 'pinp'], [79, 24.7252, 0.818, 9.170, 37.383])
print(" Ban "+answer+" bi nguy co gay xuong")

đoạn code của m bị lôi:
Traceback (most recent call last):
  File "C:UsersphungDesktopve.py", line 198, in <module>
    myDat, labels = createDataSet()
NameError: name 'createDataSet' is not defined

M.n xem giùm m với ạ