Multinomial (or multiclass) logistic regression (aka softmax regression) with tensorflow
Example of solving a parameterized model with Tensorflow - define the logistic regression with multiple classes to predict.
1
2
3
4
5
6
7
8
9
10
11
12
# Multinomial (or multiclass) logistic regression (aka softmax regression) with tensorflow
importpandasaspdimporttensorflowastfimportnumpyasnp#define the static parameters for the program
NUM_FEATURES=6# each training sample has 6 features
NUM_CLASSES=3# each training label can be one of three classes (0,1,2)
LEARNING_RATE=0.01# learning rate for optimizer
MAX_STEPS=10000# number of iterations to run
\(\textbf{X}\) is \(N \times\) NUM_FEATURES dimensional matrix.
1
2
3
#define placeholders for the input data matrix and labels. Label is just a 1-d vector of classes.
X=tf.placeholder(tf.float32,shape=[None,NUM_FEATURES],name="input_matrix")Y=tf.placeholder(tf.int32,shape=[None],name="input_labels")
1
2
3
4
# define the logistic regression parameters to learn i.e. the weights and bias(intercept).
initializer=tf.truncated_normal_initializer(stddev=0.01,dtype=tf.float32)weights=tf.get_variable(name="weights",shape=[NUM_FEATURES,NUM_CLASSES],initializer=initializer,dtype=tf.float32)bias=tf.Variable(tf.zeros([1,NUM_CLASSES]),name="bias_or_intercept")
1
2
# define the model, simple XW + intercept, exponentiation will be done by softmax_cross_entropy_with_logits below.
logits=tf.matmul(X,weights)+bias
1
2
# convert the 1-d class vector to one-hot representation so that each row is a valid probability distribution.
one_hot_labels=tf.one_hot(Y,NUM_CLASSES)
1
2
3
# loss, note that softmax_cross_entropy_with_logits will exponentiate the logits before normalization
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=one_hot_labels)cross_entropy_mean=tf.reduce_mean(cross_entropy)
1
2
3
4
5
6
# get prediction accuracies
probs=tf.nn.softmax(logits)# predicted class is just the index of the largest probability.
preds=tf.argmax(probs,axis=1)num_correct_predictions=tf.equal(preds,tf.argmax(one_hot_labels,axis=1))accuracy=tf.reduce_mean(tf.cast(num_correct_predictions,tf.float32))
# generate some data
defgen_data(N,M,K):'''
N : Number of samples
M = dimension of each sample (no. of features)
K = No. of classes
'''X=np.random.normal(size=[N,M])W=np.random.normal(loc=5,size=[M,K])logits=np.exp(np.matmul(X,W))probs=np.copy(logits)S=np.sum(logits,axis=1)foriinrange(0,len(S)):probs[i,:]=logits[i,:]/S[i]classes=np.argmax(probs,axis=1)classes=np.expand_dims(classes,axis=1)data=np.copy(X)data=np.concatenate((data,classes),axis=1)np.savetxt("data.csv",data,delimiter=",",header="A,B,C,D,E,F,Class",comments="")
# generate data
gen_data(1000,NUM_FEATURES,NUM_CLASSES)#should write a file 'data.csv'
# next define tensorflow session to run training the model with the data.
withtf.Session()assess:init=tf.initialize_all_variables()sess=tf.Session()sess.run(init)# read the data just generated.
df=pd.read_csv('./data.csv')forstepinrange(MAX_STEPS):#read data using pandas
data=np.copy(df.as_matrix())np.random.shuffle(data)X_val=data[:,:NUM_FEATURES]Y_val=data[:,-1]feed_dict_val={X:X_val,Y:Y_val}_,loss_v,preds_v,accuracy_v=sess.run([train_step,cross_entropy_mean,preds,accuracy],feed_dict=feed_dict_val)ifstep%500==0:print(step,loss_v,accuracy_v)