How do I protect Python code?

可以直接看第七项

1. Cython入门

完全摘抄自Cython 三分钟入门

python

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import math

def great_circle(lon1,lat1,lon2,lat2):
radius = 3956 #miles
x = math.pi/180.0

a = (90.0-lat1)*(x)
b = (90.0-lat2)*(x)
theta = (lon2-lon1)*(x)
c = math.acos((math.cos(a)*math.cos(b)) +
(math.sin(a)*math.sin(b)*math.cos(theta)))
return radius*c

###########
# 调用它 50 万次并测定它的时间
###########
import timeit

lon1, lat1, lon2, lat2 = -72.345, 34.323, -61.823, 54.826
num = 500000

t = timeit.Timer("p1.great_circle(%f,%f,%f,%f)" % (lon1,lat1,lon2,lat2),
"import p1")
print "Pure python function", t.timeit(num), "sec"

#####
#约2.2秒
#####

Cython改写

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
######
# 定义一个Python函数,声明它的输入参数是浮点数类型,并为所有变量声明类型为C浮点数据类型
######
import math

def great_circle(float lon1,float lat1,float lon2,float lat2):
cdef float radius = 3956.0
cdef float pi = 3.14159265
cdef float x = pi/180.0
cdef float a,b,theta,c

a = (90.0-lat1)*(x)
b = (90.0-lat2)*(x)
theta = (lon2-lon1)*(x)
c = math.acos((math.cos(a)*math.cos(b)) + (math.sin(a)*math.sin(b)*math.cos(theta)))
return radius*c

将其转换为C代码再编译为Python扩展

1
2
3
4
5
6
7
8
"># this will create a c1.c file - the C source code to build a python extension
cython c1.pyx

# Compile the object file
gcc -c -fPIC -I/usr/include/python2.5/ c1.c

# Link it into a shared library
gcc -shared c1.o -o c1.so

.so(或.dll)文件可以被Python import

1
2
3
4
    t = timeit.Timer("c1.great_circle(%f,%f,%f,%f)" % (lon1,lat1,lon2,lat2),
"import c1")
print "Cython function (still using python math)", t.timeit(num), "sec"
# 约1.8秒

使用C标准库替代 python 的 math 模块

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 使用C标准库的math.h
cdef extern from "math.h":
float cosf(float theta)
float sinf(float theta)
float acosf(float theta)

def great_circle(float lon1,float lat1,float lon2,float lat2):
cdef float radius = 3956.0
cdef float pi = 3.14159265
cdef float x = pi/180.0
cdef float a,b,theta,c

a = (90.0-lat1)*(x)
b = (90.0-lat2)*(x)
theta = (lon2-lon1)*(x)
c = acosf((cosf(a)*cosf(b)) + (sinf(a)*sinf(b)*cosf(theta)))
return radius*c

##############
t = timeit.Timer("c2.great_circle(%f,%f,%f,%f)" % (lon1,lat1,lon2,lat2),
"import c2")
print "Cython function (using trig function from math.h)", t.timeit(num), "sec"
# 0.4秒

用纯粹C函数替代python函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
cdef extern from "math.h":
float cosf(float theta)
float sinf(float theta)
float acosf(float theta)

cdef float _great_circle(float lon1,float lat1,float lon2,float lat2):
cdef float radius = 3956.0
cdef float pi = 3.14159265
cdef float x = pi/180.0
cdef float a,b,theta,c

a = (90.0-lat1)*(x)
b = (90.0-lat2)*(x)
theta = (lon2-lon1)*(x)
c = acosf((cosf(a)*cosf(b)) + (sinf(a)*sinf(b)*cosf(theta)))
return radius*c

def great_circle(float lon1,float lat1,float lon2,float lat2,int num):
cdef int i
cdef float x
for i from 0 < = i < num:
x = _great_circle(lon1,lat1,lon2,lat2)
return x

# 0.2秒

2. 使用cython(虚拟机)

Protecting Python Sources With Cython——Distributing Python Programs As Compiled Binaries: How-To

2.1 安装Cython

1
2
pip install cython
pip3 install cython (for Python 3)

2.2 添加compile.py

在项目目录中加入如下脚本(compile.py).功能同 makefile:

1
2
3
4
5
6
7
8
9
10
11
12
13
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
ext_modules = [
Extension("mymodule1", ["mymodule1.py"]),
Extension("mymodule2", ["mymodule2.py"]),
# ... all your modules that need be compiled ...
]
setup(
name = 'My Program Name',
cmdclass = {'build_ext': build_ext},
ext_modules = ext_modules
)

2.3 添加main.py

设置入口
Make the entry point Python file for your application. You will import and launch all the compiled logic from there. An entry point file is required because Cython does not generate executable binaries by default (though it is capable to), so you will need a dummy Python file, where you simply import all the compiled logic and run it. It can be as simple as:

1
2
from logic import main      # this comes from a compiled binary
main ()

2.4 编译compile.py

1
2
python compile.py build_ext --inplace
python3 compile.py build_ext --inplace(for Python 3)

.py源文件旁生成.so.c

2.5 虚拟机VirtualBox、Vagrant使用

在Mac下安装使用Vagrant

  1. 安装VirtualBoxVagrant.
  2. 运行export VAGRANT_DEFAULT_PROVIDER=virtualbox
    (可以加入到bash脚本中~/.bash_profile 让运行更方便).
  3. 选择一个操作系统:
    https://app.vagrantup.com/boxes/search.
  4. 选择“How to use”选项,可以找到安装命令指南:
1
2
3
4
5
6
7
8
9
10
11
Vagrantfile
---
Vagrant.configure("2") do |config|
config.vm.box = "ubuntu/trusty64"
end


cmd
---
vagrant init ubuntu/trusty64
vagrant up
  1. 最后运行 vagrant ssh 以进入一个全新安装的Ubuntu系统控制台 (输入 exit退出)。

  2. 进入 /vagrant 目录来找到项目文件,再运行步骤2.1和2.4

For projects with a short build/release cycle, multi-plaform builds could be automated using a CI (Continuous Integration) service, like TravisCI, but that’s a story for another article.

2.6 虚拟机python环境问题

(1). How do I install a different Python version using apt-get?

1
2
3
4
5
6
7
sudo apt-get install libssl-dev openssl
wget https://www.python.org/ftp/python/3.5.0/Python-3.5.0.tgz
tar xzvf Python-3.5.0.tgz
cd Python-3.5.0
./configure
make
sudo make install

(2). network问题

python中安装包出现Retrying

pip install web.py -i http://pypi.douban.com/simple --trusted-host pypi.douban.com


 -i http://pypi.douban.com/simple --trusted-host pypi.douban.com

(3). TypeError: unsupported operand type(s) for -=: ‘Retry’ and ‘int’

Python PIP Install throws TypeError: unsupported operand type(s) for -=: ‘Retry’ and ‘int’

1
2
3
4
apt-get remove python-pip python3-pip
wget https://bootstrap.pypa.io/get-pip.py
python get-pip.py
python3 get-pip.py

(4). unknown type name ‘uint8_t

removed /usr/local/include and then reinstalled llvm using homebrew.

https://github.com/cython/cython/issues/2009

(5). Vagrant SSH can’t access shared directory

the correct fix is to exit all vagrant ssh shells, run vagrant halt (not suspend) to make sure it’s OFF (not asleep), and revert any recent renames to parent directories. Then vagrant up and see if you can get into /vagrant! After that, if you want to rename your folders just vagrant halt, rename, and vagrant up.

https://stackoverflow.com/questions/18001193/vagrant-ssh-cant-access-shared-directory

(6). The program ‘pip’ is currently not installed.

To run ‘pip’ please ask your administrator to install the package ‘python-pip’

1
2
3
4
sudo apt-get install software-properties-common
sudo apt-add-repository universe
sudo apt-get update
sudo apt-get install python-pip

(7). error: command ‘x86_64-linux-gnu-gcc’ failed with exit status 1

1
2
3
4
5
6
7
8
9
10
11
12
13
for scrapy with Python 3, you'll need
---
sudo apt-get install python3 python-dev python3-dev \
build-essential libssl-dev libffi-dev \
libxml2-dev libxslt1-dev zlib1g-dev \
python-pip

with Python 2, you'll need
---
sudo apt-get install python-dev \
build-essential libssl-dev libffi-dev \
libxml2-dev libxslt1-dev zlib1g-dev \
python-pip

https://github.com/scrapy/scrapy/issues/2115

3. cython官方文档

3.1 使用distutils来创建Cython模块

  • 官方库distutils,使用安装脚本setup.py来构建,安装包。2000年停止开发

  • setuptools是替代distutils 的增强版工具集,包含easy_install 工具,使用ez_setup.py文件,支持egg格式的构建和安装,提供查找,下载安装构建,发布,管理等包管理功能。
    setuptools是包管理的核心模块。

hello.pyx文件:

1
2
def say_hello_to(name):
print("Hello %s!" % name)

setup.py文件:

1
2
3
4
5
from distutils.core import setup
from Cython.Build import cythonize

setup(name='Hello world app',
ext_modules=cythonize("hello.pyx"))

To build, run python setup.py build_ext --inplace. Then simply start a Python session and do from hello import say_hello_to and use the imported function as you see fit.

20190301161706-image.png

20190301161706-image.png

3.2 使用Jupyter notebook

使用pip安装Jupyter notebook:

1
2
(venv)$ pip install jupyter
(venv)$ jupyter notebook

To enable support for Cython compilation, install Cython as described in the installation guide and load the Cython extension from within the Jupyter notebook:

1
%load_ext Cython

Then, prefix a cell with the %%cython marker to compile it:

1
2
3
4
5
6
%%cython

cdef int a = 0
for i in range(10):
a += i
print(a)

You can show Cython’s code analysis by passing the –annotate option:

1
2
3
4
5
6
%%cython --annotate

cdef int a = 0
for i in range(10):
a += i
print(a)

20190301161603-image.png

20190301161603-image.png


4. 实际项目中cython使用

进入project

20190305112316-image.png

20190305112316-image.png

假设要编译fee_test_NA.py文件为一个cython module。

  1. 创建cython_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
from distutils.core import setup
from Cython.Build import cythonize
from distutils.extension import Extension

extensions = [Extension("fee_test_NA", ["classify/fee_test_NA.py"])]


setup(
name='fee_test_NA app',
# ext_modules=cythonize("classify/fee_test_NA.py")
ext_modules=cythonize(extensions),
)
  1. 运行 python cython_setup.py build_ext --inplace

    20190305121109-image.png

    20190305121109-image.png

  2. 在代码中引用 import fee_test_NA

5. 实际项目中wheel使用

Packaging Python Projects

  1. 把创建的.so文件放到一个想要打包的文件夹里。
  2. 新建MANIFEST.in文件。
    目的是打包.so文件
1
recursive-include fasttexttestdemo *.so
  1. 新建setup.py文件

How do you add additional files to a wheel?在wheel中加资源文件

Package only binary compiled .so files of a python library compiled with Cython打包二进制so文件

How can I make a Python Wheel from an existing native library?从原始库中打包

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from setuptools import setup, Distribution


class BinaryDistribution(Distribution):
def has_ext_modules(foo):
return True


setup(
name='fee_test_na_app',
version='0.0.1',
description='fee_test_NA Library',
packages=['feetestna_app'],
package_data={
'feetestna_app': ['*.so'],
},
distclass=BinaryDistribution)
20190305130032-image.png

20190305130032-image.png

  1. 打包并安装到python库

https://sparkydogx.github.io/2017/08/29/python-00/

1
2
3
查找pip包路径
>>> import site
>>> site.getsitepackages()

  • egg文件: 压缩文件,Python工程打包发布用。
  • wheel文件: wheel是一种制作分发格式,由PEP427定义。wheel志在取代egg格式。pip支持wheel。
1
2
python setup.py bdist_wheel
pip install dist/fee_test_NA_APP-0.0.1-cp35-cp35m-macosx_10_14_x86_64.whl

创建Pure-Python wheel

How to create a Pure-Python wheel

How to build and distribute a Python/Cython package that depends on third party libFoo.so打包通用包

1
2
3
4
5
6
  --universal       make a universal wheel (default: false)

----
#add a setup.cfg file next to your setup.py that takes care of this:
[bdist_wheel]
universal = 1
20190305130707-image.png

20190305130707-image.png

20190305130542-image.png

20190305130542-image.png

20190305130743-image.png

20190305130743-image.png

  1. 测试
1
2
from feetestna_app.fee_test_NA import test
test("/Users/wangzulong/Desktop/results.txt")
20190305133425-image.png

20190305133425-image.png

6. 多层代码打包处理

Using Cython to protect a Python codebase

python setup.py build_ext
python setup.py bdist_wheel
1
2
3
4
5
6
7
8
9
10
src
├── mypkg
│ ├── bar.py
│ ├── foo.py
│ └── __init__.py
├── mypkg2
│ ├── bar.py
│ ├── foo.py
│ └── __init__.py
└── setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
###
# setup.py
###

from setuptools import setup
from setuptools.extension import Extension

from Cython.Build import cythonize
from Cython.Distutils import build_ext

setup(
name="mypkg",
ext_modules=cythonize(
[
Extension("mypkg.*", ["mypkg/*.py"]),
Extension("mypkg2.*", ["mypkg2/*.py"])
],
build_dir="build",
compiler_directives=dict(
always_allow_keywords=True
)),
cmdclass=dict(
build_ext=build_ext
),
packages=["mypkg", "mypkg2"]
)

always_allow_keywords 指令通过禁用对有大量参数的函数的关键字参数优化来保持flask视图函数工作正常。
ext_package使编译代码保存在相同位置

从资源树复制__init__.py 文件
修改build_ext类:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# ...

from pathlib import Path
import shutil

# ...

class MyBuildExt(build_ext):
def run(self):
build_ext.run(self)

build_dir = Path(self.build_lib)
root_dir = Path(__file__).parent

target_dir = build_dir if not self.inplace else root_dir

self.copy_file(Path('mypkg') / '__init__.py', root_dir, target_dir)
self.copy_file(Path('mypkg2') / '__init__.py', root_dir, target_dir)
self.copy_file(Path('mypkg') / '__main__.py', root_dir, target_dir)
self.copy_file(Path('mypkg2') / '__main__.py', root_dir, target_dir)

def copy_file(self, path, source_dir, destination_dir):
if not (source_dir / path).exists():
return

shutil.copyfile(str(source_dir / path), str(destination_dir / path))

setup(
# ...
cmdclass=dict(
build_ext=MyBuildExt
),
# ...
)

删除packages参数,使wheel包内不包含源代码

1
2
3
4
setup(
# ...
packages=[]
)

pip install dist/*.whl

7. docker一键打包

dockerfile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 将官方 Python 运行时用作父镜像
FROM python:3.5.2

# 复制到位于 /app/CNOOC 中的容器中
COPY *.sh /app/CNOOC/

# 将工作目录设置为 /app/CNOOC
WORKDIR /app/CNOOC/

# 更新pip
RUN pip install --upgrade pip

# 更新setuptools
RUN pip install --upgrade setuptools

RUN pip install Cython

RUN pip install wheel

RUN chmod +x *.sh

ENTRYPOINT [ "bash", "package.sh" ]

package.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
#!/bin/bash

# remove old compile folders
rm -rf build/
rm -rf *.egg-info/
rm -rf dist/

# complie train module
python setup.py bdist_wheel

# remove unused folders
rm -rf build/
rm -rf *.egg-info/

setup.py
注意在源代码里如果有内部文件引用,则需要增加testna.前缀,否则定位不到包内文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from setuptools import setup
from setuptools.extension import Extension

from Cython.Build import cythonize
from Cython.Distutils import build_ext

from pathlib import Path
import shutil


class MyBuildExt(build_ext):
def run(self):
build_ext.run(self)

build_dir = Path(self.build_lib)
root_dir = Path(__file__).parent

target_dir = build_dir if not self.inplace else root_dir

print("build_dir: " + str(build_dir))
print("root_dir: " + str(root_dir))
print("target_dir: " + str(target_dir))

self.copy_file(Path('testna') / '__init__.py', root_dir, target_dir)
self.copy_file(
Path('testna/filter/') / '__init__.py', root_dir, target_dir)
self.copy_file(
Path('testna/filter/input/') / '__init__.py', root_dir, target_dir)
self.copy_file(
Path('testna/filter/output/') / '__init__.py', root_dir,
target_dir)
self.copy_file(
Path('testna/security/') / '__init__.py', root_dir, target_dir)

def copy_file(self, path, source_dir, destination_dir):
print("path: " + str(path))
if not (source_dir / path).exists():
return
print("source_dir / path: " + str(source_dir / path))
print("destination_dir / path: " + str(destination_dir / path))
shutil.copyfile(str(source_dir / path), str(destination_dir / path))


setup(
name="cnooc-testservice",
version="1.0.0",
description="CNOOC Service",
license="IBM limited",
ext_modules=cythonize(
[
Extension("testna.*", ["testna/*.py"]),
Extension("testna.filter.input.*", ["testna/filter/input/*.py"]),
Extension("testna.filter.output.*", ["testna/filter/output/*.py"]),
Extension("testna.security.*", ["testna/security/*.py"])
],
build_dir="build",
compiler_directives=dict(always_allow_keywords=True)),
cmdclass=dict(build_ext=MyBuildExt),
packages=[],
install_requires=[],
entry_points={'console_scripts': ['cnooc-testna = testna.TestApp:main']})

8. 参考资料整理

nuitkapython打包工具

cython写的python包,带so/带ddl的包打包到pypi上,上传

Python打包分发工具setuptools

Cython 基本用法

比Python快100倍,利用Cython实现高速NLP项目