source: stack overflow
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
rs = np.random.RandomState(5)
mean = [0, 0]
cov = [(1, .5), (.5, 1)]
x1, x2 = rs.multivariate_normal(mean, cov, 500).T
x1 = pd.Series(x1, name="$X_1$")
x2 = pd.Series(x2, name="$X_2$")
g = sns.jointplot(x1, x2, kind="kde", size=7, space=0)
sns.regplot(x1,x2, scatter=False, ax=g.ax_joint)
plt.show()
# assign tick position and label
# especially for bar plot or time series plot
plt.xticks(range(0, len(compare_cell_ls)), compare_cell_ls, rotation=45)
# Not work: label will off-set 1 (still don't know why)
ax.set_xticklabels(compare_cell_ls, rotation=45)
# auto get & rotate
ax[0].set_xticklabels(ax[0].xaxis.get_majorticklabels(), rotation=45)
# work for last row graph, not Every plots (in FacetGrid)
g.set_xticklabels(rotation=45)
# data format
# each row denote a gene's expression under different condition
[zhangqf7@loginview02 HuR]$ head predict_RBP_binding_combine.compare.txt|cut -f 4-7
egg 1cell 4cell 64cell
0.21742857142857144 0.34700000000000003 0.12 0.13285714285714287
0.22228571428571428 0.1551428571428571 0.03528571428571429 0.04671428571428572
0.12285714285714285 0.07571428571428572 0.027000000000000003 0.026857142857142857
0.41571428571428576 0.5638571428571428 0.34114285714285714 0.2785714285714286
0.4587142857142856 0.3832857142857143 0.40771428571428575 0.3097142857142857
0.217 0.2868571428571429 0.13699999999999998 0.14914285714285716
0.21757142857142855 0.4165714285714285 0.1558571428571429 0.15371428571428572
0.33399999999999996 0.3514285714285714 0.1827142857142857 0.17557142857142854
0.32557142857142857 0.3127142857142857 0.19657142857142856 0.2992857142857143
# plot each as trend line
fig,ax=plt.subplots()
for i in df_plot.index:
ax.plot(range(0, len(col_ls)), df_plot.loc[i, col_ls], color='grey', alpha=0.3, lw=0.3)
# mean value of each state
# axis=0 => mean of each column (add a new row); axis=1 => mean of each row (add a new column)
df_plot_mean = df_plot.loc[:, compare_cell_ls].mean(axis=0)
ax.plot(range(0, len(compare_cell_ls)), df_plot_mean, color='blue')
ax.legend_.remove()
# show legend
plt.legend()
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(10)
fig = plt.figure()
ax = plt.subplot(111)
for i in xrange(5):
ax.plot(x, i * x, label='$y = %ix$'%i)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
# Use plot with tight mode, or plot will be cut
plt.legend(bbox_to_anchor=(1, 1), loc=2)
plt.savefig(savefn, bbox_inches='tight')
plt.plot((.1, .3))
ax.axis('square')
ax.set_xlim(0.1, 0.3)
# in seaborn like jointplot also works
g = sns.jointplot(x='col1', y='col2', data=d_g, kind="reg", stat_func=stats.pearsonr, size=10)
g.ax_joint.set_xlim(0.35, 0.9)
g.ax_joint.set_ylim(0.35, 0.9)
lims = [
np.min([ax.get_xlim(), ax.get_ylim()]), # min of both axes
np.max([ax.get_xlim(), ax.get_ylim()]), # max of both axes
]
# now plot both limits against eachother
ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
ax.set_aspect('equal')
ax.set_xlim(lims)
ax.set_ylim(lims)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
Use ax.annotate()
as stackoverflow, can ass marks like arrow etc.:
y = [2.56422, 3.77284, 3.52623, 3.51468, 3.02199]
z = [0.15, 0.3, 0.45, 0.6, 0.75]
n = [58, 651, 393, 203, 123]
fig, ax = plt.subplots()
ax.scatter(z, y)
for i, txt in enumerate(n):
ax.annotate(txt, (z[i], y[i]))
Can also use plt.text()
function:
# with x,y denote absolute coordinates
plt.text(x, y, s, fontsize=12)
# with transform to project the axes
# 这里的x,y就是相对于坐标轴的哪个位置的
plt.text(0.5, 0.5, str, ha='center',va='center', transform=ax.transAxes)
Non-overlapped text based on module adjustText
as example here:
# here is a very useful library to adjust overlapped text
# https://stackoverflow.com/questions/19073683/matplotlib-overlapping-annotations-text
# https://github.com/Phlya/adjustText
from adjustText import adjust_text
texts = []
for x, y, s in zip(xs, ys, ss):
texts.append(plt.text(x, y, s))
adjust_text(texts, only_move={'text': 'y'})
# 这个是画水平或者竖直的线,只指定x或者y时,跨越坐标轴
# 指定比如xmin/xmax时,画对应的比例。这些值取值为[0,1]之间
# https://stackoverflow.com/questions/16930328/vertical-horizontal-lines-in-matplotlib/16930526
plt.axhline(y=0, xmin=0, xmax=1, hold=None, **kwargs)
plt.axvline(x=0, ymin=0, ymax=1, hold=None, **kwargs)
# 直接连接任意两点,指定两点的坐标时
plt.plot((x1, x2), (y1, y2), 'k-')
plt.axvspan(3, 6, color='red', alpha=0.5)
im = plt.imread('grace_hopper.jpg')
newax = fig.add_axes([0.8, 0.8, 0.2, 0.2], anchor='NE', zorder=-1)
newax.imshow(im)
newax.axis('off')
比如下面的代码,可以把多个iteration的训练过程的loss和accuracy画在一个pdf文件中:
from matplotlib.backends.backend_pdf import PdfPages
def plot_history(history, pdf):
fig,ax = plt.subplots()
# Plot training & validation accuracy values
plt.plot(history['acc'])
plt.plot(history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
pdf.savefig(fig)
fig,ax = plt.subplots()
# Plot training & validation loss values
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
pdf.savefig(fig)
pdf = matplotlib.backends.backend_pdf.PdfPages(plot_savefn)
for ite in range(n_ite):
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.4,)
clf.fit(X_train)
history = clf.history_
plot_history(history, pdf)
plt.close()
pdf.close()
可以使用ax.text
函数,在特定的位置添加注释。一个常用的场景是标注显著性,比如pvalue<=0.01的标注为**
,但是需要注意的是,如果旋转为垂直方向,**
通常是不会和其提供的x坐标位置对齐,因为*
本身是不和字母对齐的,这个时候可以选用其他的字符,比如+#
等:
d1 = pd.DataFrame({'0':[1,2,3,4], '1':[4,5,6,5]})
fig,ax=plt.subplots(1,2,figsize=(16,6))
ax[0].plot(d1['0'], marker='.')
for n,i in enumerate(d1['0']):
ax[0].text(n, i+0.1, '*A*+#', va='bottom', ha='center', rotation='vertical', size='xx-large')
ax[1].plot(d1['0'], marker='.')
for n,i in enumerate(d1['0']):
ax[1].text(n, i+0.1, '*A*+#', va='bottom', ha='center', rotation=0, size='xx-large')
As discussed here:
Based on manual setup:
import seaborn as sns, matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
sns.boxplot(x="day", y="total_bill", data=tips, palette="PRGn")
# statistical annotation
x1, x2 = 2, 3 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = tips['total_bill'].max() + 2, 2, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, "ns", ha='center', va='bottom', color=col)
plt.show()
Based on repo statannot
, which works for seaborn boxplot
only:
import matplotlib.pyplot as plt
import seaborn as sns
from statannot import add_stat_annotation
sns.set(style="whitegrid")
df = sns.load_dataset("tips")
x = "day"
y = "total_bill"
order = ['Sun', 'Thur', 'Fri', 'Sat']
ax = sns.boxplot(data=df, x=x, y=y, order=order)
add_stat_annotation(ax, data=df, x=x, y=y, order=order,
boxPairList=[("Thur", "Fri"), ("Thur", "Sat"), ("Fri", "Sun")],
test='Mann-Whitney', textFormat='star', loc='outside', verbose=2)
g = sns.jointplot(x='m1',y='m2',data=df,kind='kde', xlim=(0.0,0.5), ylim=(0.0,0.5), height=8, ratio=5)
sns.regplot(df['m1'],df['m2'], scatter=False, ax=g.ax_joint)
r,p = stats.pearsonr(df['m1'],df['m2'])
s = 'R = {:.2f}\nP = {:.2e}\nN = {}'.format(r,p,df.shape[0])
g.ax_joint.text(0.05, 0.9, s, ha='left', va='top', size=20, transform=g.ax_joint.transAxes)
参考这里
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
官网例子:
import numpy as np
import matplotlib.pyplot as plt
# Create some mock data
t = np.arange(0.01, 10.0, 0.01)
data1 = np.exp(t)
data2 = np.sin(2 * np.pi * t)
fig, ax1 = plt.subplots()
color = 'tab:red'
ax1.set_xlabel('time (s)')
ax1.set_ylabel('exp', color=color)
ax1.plot(t, data1, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis
color = 'tab:blue'
ax2.set_ylabel('sin', color=color) # we already handled the x-label with ax1
ax2.plot(t, data2, color=color)
ax2.tick_params(axis='y', labelcolor=color)
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.show()
# get python default color list
# https://stackoverflow.com/questions/42086276/get-default-line-colour-cycle
# color_ls = plt.rcParams['axes.prop_cycle'].by_key()['color']
# seaborn color list
color_stages = sns.color_palette('Set1',n_colors=7, desat=0.8)
my_pal = {'egg':color_stages[0], '1cell': color_stages[1], '4cell': color_stages[2], '64cell': color_stages[3], '1K': color_stages[4], 'sphere':color_stages[5], 'shield':color_stages[6]}
sns.boxplot(x='cell', y='gini', data=df_save_all, ax=ax[0], palette=file_info_dict['my_pal'])
### 指定height_ratios,一般根据每个集合具有的feature的数目
fig, ax = plt.subplots(3,1,figsize=(32, 25), gridspec_kw = {'height_ratios':[19, 15, 7]}, sharey=False, sharex=True)
### plot heatmap
h1 = sns.heatmap(df_plot_all[function_ls[1:]].T,linecolor='grey', linewidths=0.1, cbar=False, square=True, cmap="Greens", ax=ax[0])
h2 = sns.heatmap(df_plot_all[localization_ls].T,linecolor='grey', linewidths=0.1, cbar=False, square=True, cmap="Greens", ax=ax[1])
h3 = sns.heatmap(df_plot_all[domain_ls].T,linecolor='grey', linewidths=0.1, cbar=False, square=True, cmap="Greens", ax=ax[2])
### keep one xlabel for all, also keep yticklabels
# h1.set(xlabel='', ylabel='Heatmap1')
# h2.set(xlabel='', ylabel='Heatmap2')
# h3.set(xlabel='Columns', ylabel='Heatmap3')
### keep one xlabel for all, remove yticklabels
h1.set(xlabel='', ylabel='Heatmap1', yticks=[])
h2.set(xlabel='', ylabel='Heatmap2', yticks=[])
h3.set(xlabel='Columns', ylabel='Heatmap3', yticks=[], xticks=[])
### set yticklabels on the right
ax[0].yaxis.tick_right()
ax[0].set_yticklabels(ax[0].yaxis.get_majorticklabels(), rotation=0)
ax[1].yaxis.tick_right()
ax[1].set_yticklabels(ax[1].yaxis.get_majorticklabels(), rotation=0)
ax[2].yaxis.tick_right()
ax[2].set_yticklabels(ax[2].yaxis.get_majorticklabels(), rotation=0)
ax[2].set_xticklabels(ax[2].xaxis.get_majorticklabels(), rotation=90)
### set tick label color individually
### https://stackoverflow.com/questions/21936014/set-color-for-xticklabels-individually-in-matplotlib
# color_ls = ['red']*10+['black']*19
# [t.set_color(i) for (i,t) in zip(color_ls,ax[2].xaxis.get_ticklabels())]
plt.tight_layout()
plt.savefig('./test.png')
plt.close()
As discussed here
import numpy as np; np.random.seed(0)
import seaborn as sns; sns.set()
uniform_data = np.random.rand(10, 12)
ax = sns.heatmap(uniform_data)
ax.invert_yaxis()
No size
argument in sns.heatmap
function, can only set by plt
as discussed here
fig,ax = plt.subplots(figsize=(10, 16))
sns.heatmap(..., ax=ax)
使用annot_kws
字典参数,参考这里:
sns.heatmap(corrmat,
vmin=corrmat.values.min(),
vmax=1, square=True, cmap="YlGnBu",
linewidths=0.1, annot=True,
annot_kws={"fontsize":8})
def merge_symmetry_df_to_diagonal_lower(df=None):
if df is None:
df = pd.DataFrame({0:[1, 0.3, 0.8], 1:[0.3, 1, 0.3], 2:[0.8, 0.3, 1]})
df_twice = df + df.T
# df_twice.values[[np.arange(df.shape[0])]*2] = df_twice.values[[np.arange(df.shape[0])]*2] / 2
df_twice.values[[np.arange(df.shape[0])]*2] = np.diagonal(df)
return df_twice
d1 = pd.DataFrame({0:[1, 0.3, 0.8], 1:[0.3, 1, 0.3], 2:[0.8, 0.3, 1]})
d = merge_symmetry_df_to_diagonal_lower()
fig, ax = plt.subplots(1,3, figsize=(12,3))
sns.heatmap(d1, ax=ax[0], square=True, annot=True)
sns.heatmap(d, ax=ax[1], square=True, annot=True)
mask = np.zeros_like(d)
mask[np.triu_indices_from(mask)] = True
mask[np.diag_indices_from(mask)] = False
sns.heatmap(d, ax=ax[2], square=True, annot=True, mask=mask)
plt.tight_layout()
参考这里直接设置背景颜色即可:
fig,ax=plt.subplots(figsize=(10,20))
g = sns.heatmap(df_3, xticklabels=False, yticklabels=False, cmap="summer")
g.set_facecolor('black')
/Applications/Inkscape.app/Contents/Resources/bin/inkscape -l Python_graph.svg Python_graph.pdf
# auto rows and columns
montage *png out.pdf
# use filename to label each image
montage -label '%f' * out.pdf
# 4 columns x multiple rows
montage *.png -mode concatenate -tile 4x out.pdf
# by default: one pdf per page
convert *pdf merge.pdf
# 和合并图片一样,可以指定行列数(-tile)
montage *pdf -mode concatenate merge.pdf
最近在使用Cytoscape做一些网络的可视化,作为一个老牌的工具,在性能和功能上确实很好。同时,其小组也开发了pycytoscape工具,通过REST访问客户端,实现编程式的网络可视化。通过结果其他的网络分析模块(比如networkx),添加对于节点的更多特征,以承载更多的信息。
导入相关的模块和基本的配置:
from py2cytoscape.data.cynetwork import CyNetwork
from py2cytoscape.data.cyrest_client import CyRestClient
from py2cytoscape.data.style import StyleUtil
import py2cytoscape.util.cytoscapejs as cyjs
import py2cytoscape.cytoscapejs as renderer
import networkx as nx
import pandas as pd
import json
import os
import imageio
import scipy
清空当前的session:
cy = CyRestClient()
cy.session.delete()
读入一个pandas dataframe:
f = '/Users/gongjing/Dropbox/Zebrafish_development/results/plots/cytoscape/nx_merge_12345.txt'
df = pd.read_csv(f, header=0, sep='\t')
df.head()
source target egg 1cell 4cell 64cell 1k
0 NM_212843 NM_183349 NO YES NO NO NO
1 NM_212840 NM_001114579 NO NO NO YES NO
2 NM_212840 NM_001098252 NO NO YES NO NO
3 NM_212841 NM_001017830 NO YES NO YES NO
4 NM_212846 NM_001190758 NO NO NO YES NO
从pandas dataframe读入为一个网络:
nx_inter_RRI = nx.from_pandas_dataframe(df, 'source', 'target', edge_attr=['egg', '1cell', '4cell','64cell', '1k'])
net_module = cy.network.create_from_networkx(nx_inter_RRI)
控制网络的layout:
cy.layout.apply(name='circular', network=net_module)
读入节点的注释数据:
node_anno = node_anno_dict['all']
df_node_anno = pd.read_csv(node_anno, header=0, sep='\t')
df_node_anno.head()
name type first_occur node_degree egg 1cell 4cell 64cell 1K
0 NM_212843 mRNA 1cell 1 NO YES NO NO NO
1 NM_212840 mRNA 4cell 2 NO NO YES YES NO
2 NM_212841 mRNA 1cell 1 NO YES NO YES NO
3 NM_212846 mRNA egg 2 YES YES NO YES YES
4 NM_212844 mRNA 1K 1 NO NO NO NO YES
更新node table, 属性列用于后面网络特征的映射:
net_module.update_node_table(df=df_node_anno, network_key_col='name',data_key_col='name')
定义颜色集合:
RNA_type_ls = ['mRNA', 'lncRNA', 'miRNA', 'misc_RNA', 'pseudogene', 'rRNA', 'snRNA', 'snoRNA', 'other']
RNA_type_color_ls = ['202,75,78', '83,169,102', '205,185,111', '98,180,208', '129,112,182', '238,130,238',
'255,140,0', '74,113,178', '169,169,169']
RNA_type_color_dict = {i:j for i,j in zip(RNA_type_ls, RNA_type_color_ls)}
给不同类型的节点上不同的颜色:
my_module_style = cy.style.create('RRI Module Style')
my_module_style.create_discrete_mapping(column='type',vp='NODE_FILL_COLOR',
col_type='String',mappings=RNA_type_color_dict)
cy.style.apply(my_module_style, n)
如果已经通过cytoscape的菜单,把某一次的操作保存为了.cys文件,也可以直接load进来,获取对应的网络,并更新node table, 应用新的配置等:
cy = CyRestClient()
cy.session.delete()
# 打开已有的文件
mysession = cy.session.open('/Users/gongjing/Dropbox/Zebrafish_development/results/plots/cytoscape/all_dynamic_network.cys')
# 获取当前网络的suid
all_suid = cy.network.get_all()
# 获取当前的网络,以操作
n = cy.network.create(all_suid[0])
# 更新当前网络的节点信息
n.update_node_table(df=df_node_anno, network_key_col='name',data_key_col='name')
最近,google在其开发网站上,公开了用于内部人员进行机器学习培训的材料,可以快速帮助了解机器学习及其框架TensorFlow。量子位提供了一个相关材料的连接(别翻墙了,谷歌机器学习速成课25讲视频全集在此)。近期会学习这个系列的材料,做一点后续的笔记。
# tf.estimator API
import tensorflow as tf
# set up a classifier
classifier = tf.estimator.LinearClassifier()
# Train the model on some example data.
# what does steps mean here?
classifier.train(input_fn=train_input_fn, steps=2000)
# Use it to predict.
predictions = classifier.predict(input_fn=predict_input_fn)
表示:Representation
检查数据:1)遗漏值;2)重复样本;3)不良标签;4)不良特征值。
简化正则化(Regularization for simplicity):
经过L2正则化后的损失函数: 正则化后的损失函数,包含两项:1)原始损失函数;2)惩罚项(惩罚模型的复杂度的)。其中惩罚项含有一个系数lambda,此系数可以控制(平衡)损失和惩罚的相对比例。如果有大量的数据,且训练数据和测试数据看起来比较接近,则模型很好,不需要惩罚,可设置lambda为0;如果过拟合,可以设置一定的正值,以增大整体的损失。
稀疏性正则化(regularization for sparsity)
神经网络(Neural network)
拓展参考:
有效的机器学习准则:
今天看到这里出了100道关于pandas的题目,以加深对于pandas的理解。
这里列举了这个网站的可视化的类型和链接:
Category | Type | Description |
---|---|---|
Distribution | violin | one or more numeric variables for one or several groups |
Distribution | density | strong |
Distribution | boxplot | baz |
Distribution | histogram | baz |
Correlation | scatter plot | baz |
Correlation | connected scatter plot | baz |
Correlation | bubble plot | baz |
Correlation | heatmap | baz |
Correlation | 2D density plot | baz |
Correlation | Correlation matrix | baz |
Ranking | bar plot | |
Ranking | box plot | |
Ranking | parallel plot | |
Ranking | Lollipop plot | |
Ranking | word cloud | |
Ranking | spider plot | |
Part of a whole | stacked bar plot | |
Part of a whole | tree plot | |
Part of a whole | venn plot | |
Part of a whole | doughnut plot | |
Part of a whole | pie plot | |
Part of a whole | tree diagram | |
Evolution | line plot | |
Evolution | area plot | |
Evolution | stacked area plot | |
Evolution | parrallel plot | |
Evolution | streamchart | |
Maps | map | |
Maps | chloropleth map | |
Maps | connection map | |
Maps | bubble map | |
Flow | Chord diagram | |
Flow | Network chart | |
Flow | Sankey diagram |
这个博客的搭建主要是基于github pages,使用jekyll模板。在线的流程有很多,我主要是参考了这篇文章。
简要的步骤如下:
参考博客(GitHub教程 学生认证——学生包申请)进行申请,因为用的是教育邮箱,所以不到两分钟就收到了通过邮件。
Hey XXX, we have some awesome news
We've upgraded you to a plan with unlimited free private repositories, which will be free for the next two years. After that, you'll get an email saying that your coupon is expiring. You can reapply for another coupon if you still have academic status. We don't have any collaboration limits, so any group projects you may encounter can be hosted via your account.
If you need help getting started with Git and GitHub, check out:
https://help.github.com/articles/good-resources-for-learning-git-and-github
We've also given you access to the Student Developer Pack, available at:
https://education.github.com/pack
If you have any questions, contact us:
https://education.github.com/contact
Spread the word: we love giving educational discounts to students, teachers, administrators, and researchers! Please send them to:
https://education.github.com
Have an Octotastic day!
- The GitHub Education Team
本地的文件目录大致如下:
gongjing@hekekedeiMac ~/Dropbox/Tsinghua-gongjing.github.io (git)-[master] % ll
total 52K
-rw-r--r-- 1 gongjing staff 156 Feb 5 21:55 404.html
-rw-r--r-- 1 gongjing staff 0 Feb 5 21:55 CNAME
-rw-r--r-- 1 gongjing staff 654 Feb 7 13:53 README.md
-rw-r--r-- 1 gongjing staff 852 Feb 7 22:25 _config.yml
drwxr-xr-x 12 gongjing staff 408 Feb 7 20:37 _includes
drwxr-xr-x 7 gongjing staff 238 Feb 8 10:48 _layouts
drwxr-xr-x 37 gongjing staff 1.3K Feb 9 10:37 _posts
drwxr-xr-x 19 gongjing staff 646 Feb 8 15:41 _site
-rw-r--r-- 1 gongjing staff 2.5K Feb 9 10:19 about.md
-rw-r--r-- 1 gongjing staff 579 Feb 5 21:55 archives.md
drwxr-xr-x 8 gongjing staff 272 Feb 6 20:04 assets
-rw-r--r-- 1 gongjing staff 743 Feb 5 21:55 atom.xml
-rw-r--r-- 1 gongjing staff 422 Feb 5 21:55 categories.md
drwxr-xr-x 4 gongjing staff 136 Feb 7 18:43 css
-rw-r--r-- 1 gongjing staff 1.1K Feb 5 21:55 faqs.md
-rw-r--r-- 1 gongjing staff 1.2K Feb 5 21:55 favicon.ico
drwxr-xr-x 3 gongjing staff 102 Feb 5 21:55 fonts
-rw-r--r-- 1 gongjing staff 945 Feb 6 11:11 index.html
drwxr-xr-x 6 gongjing staff 204 Feb 5 21:55 js
-rw-r--r-- 1 gongjing staff 679 Feb 5 21:55 links.md
drwxr-xr-x 8 gongjing staff 272 Feb 6 11:44 posts
-rw-r--r-- 1 gongjing staff 53 Feb 5 21:55 robots.txt
-rw-r--r-- 1 gongjing staff 3.9K Feb 5 21:55 tags.md
这路使用的markdown文件来写post(放在_posts目录下面),github pages会基于jekyll自动生成网页的格式。_layout文件夹定义了每个网页的基本格式,可以通过修改这里的html文件,调整网页的布局。
clone的这个模板没有评论部分的代码,看了一下原来的网站,使用的gitment服务(评论需要github账号)。另一个用的比较多的是Disqus服务,于是自己添加代码搭建了一个。主要的参看是这里。
注意:
博客会有很多图片,使得传达信息更加直接,为了使自己的网站不臃肿,可以把图片都放在一些图床上,然后在post中放图片的链接,生成网页时会直接加载。目前正在申请七牛云的账号,免费账号10G存储(需要拿着身份证拍正反面照片,上传)。
更新:对于图片直接放在assets(比如:/Users/gongjing/Dropbox/Tsinghua-gongjing.github.io/assets)目录下面即可,然后在md文件写相对链接(因为jekyll会自动解析相对网站域名的路径, 可参考这里,PDF文件也可以直接放在这个目录下,在浏览器中直接打开或者下载),比如:
![bed](/assets/bed_file_format_example.jpeg)
参考这篇文章:PicGo+GitHub图床,让Markdown飞,可以实现截图、上传到github自己建立的repo下,然后生成markdown链接,提升了记录的效率。软件下载在这里:PicGo@github
设计一个图片后,上传到favicon生成,对应的icon文件,放在root目录下即可。(如果更换后,加载没有更新,需要清理一下网站的缓存)
目前实现的是在单个post(.md文件)中添加,需要在开头显式指定调用MathJax,因为其本身不提供解析。kramdown的math-block,参考github render math blocks。 比如在写.md文件时,先调用,效果参见post。
---
layout: post
category: "read"
title: "Think Stats: descriptive statistics?"
tags: [reading, statistics]
---
<script type="text/javascript" async
src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-MML-AM_CHTML">
</script>
1. 均值(mean):值的总和除以值的数量;平均值(average):若干种可以用于描述样本的典型值或**集中趋势(central tendency)**的汇总统计量之一。注意根据样本的范围选择合适的描述量。
2. 方差:描述分散情况。
$$
\begin{align}\sigma^2 = \frac{1}{n}∑(X_i-\mu)^2\end{align}
$$
<div>
<table>
<tr>
<th align="middle">Not smooth</th>
<th align="middle">Smooth</th>
</tr>
<tr>
<td><img src="https://i.stack.imgur.com/dSLtt.png"></td>
<td><img src="https://i.stack.imgur.com/olGAh.png"></td>
</tr>
</table>
</div>
在每个页面下方,有个链接,但是当链接含有中文时,显示出一长串的码,所以需要解析:
post页面地址:
https://tsinghua-gongjing.github.io/posts/模型评估与选择.html
显示:
If you link this blog, please refer to this page, thanks!
Post link:https://tsinghua-gongjing.github.io/posts/%E6%A8%A1%E5%9E%8B%E8%AF%84%E4%BC%B0%E4%B8%8E%E9%80%89%E6%8B%A9.html
在/_layouts/post.html
页面的代码部分:
<div style="margin:10px">
<br/>
<b>If you link this blog, please refer to this page, thanks!</b>
<br/>
<b>Post link:</b><a href="\\https://tsinghua-gongjing.github.io/posts/github_pages_based_blog.html" title="">https://tsinghua-gongjing.github.io/posts/github_pages_based_blog.html</a>
</div>
这是英国泰晤士报,发布的大学排名,所展示的评估指标的权重。