<!DOCTYPE html>
sns.set(style="ticks")
sns.set_context("poster")
from adjustText import adjust_text
f = '../data/HEK293T.duplex.RBP.stat.bed'
df = pd.read_csv(f, header=0, index_col=0, sep='\t')
color_category = []
for i in df.iterrows():
i = i[1]
if float(i['pct(cover RRI) 1']) >= 0.05 and float(i['pvalue']) <= 0.01 and float(i['oddsratio']) >= 1.5:
color_category.append('high')
elif float(i['pvalue']) <= 0.01 and float(i['oddsratio']) >= 1.5:
color_category.append('weak')
else:
color_category.append('none')
df['enrich level'] = color_category
df.head()
fig,ax=plt.subplots(figsize=(10,12))
df['size'] = 0.01
sns.scatterplot(x='log2(obs/exp)', y='pct(cover RRI) 1', hue='enrich level', data=df, size='size')
xs = df[df['enrich level']!='none']['log2(obs/exp)']
ys = df[df['enrich level']!='none']['pct(cover RRI) 1']
ss = df[df['enrich level']!='none'].index
texts = []
for x, y, s in zip(xs, ys, ss):
texts.append(plt.text(x, y, s))
# adjust_text(texts, only_move={'text': 'y'})
plt.ylabel('Percentage of interacting regions')
pd.concat([df, df], axis=1).shape
pd.DataFrame(df['enrich level'].value_counts())
1 198
2 2497
3 1274
4 506
5 217
6 349
a = [1,2,3]
a.extend([4,5])
a
np.random.seed(1234)
import random
random.sample(a, 2)
from scipy import stats
stats.pearsonr(a, a)