在写毕设OJ的时候需要完成一个代码查重的功能,很久以前就发现了这个代码查重工具。
编译的环境是
Deepin 20.1 社区版
Linux wangx-PC 5.4.70-amd64-desktop #1 SMP Wed Oct 14 15:24:23 CST 2020 x86_64 GNU/Linux
1. 首先下载源码包
2.修改Makefile
下面是我改好的makefile,你可以修改
# This file is part of the software similarity tester SIM.
# Written by Dick Grune, Vrije Universiteit, Amsterdam.
# $Id: Makefile,v 2.66 2015-04-29 18:18:21 dick Exp $
#
VERSION="\"2.89 of 2015-04-29\""
# E N T R Y P O I N T S
help:
@echo 'Entry points:'
@echo 'test: compile sim_c and run a simple test'
@echo ''
@echo 'binaries: create all binaries'
@echo 'exes: create executables in MSDOS'
@echo 'install: install all binaries'
@echo ''
@echo 'man: create sim.pdf'
@echo 'lint: lint sim sources'
@echo 'simsim: run sim_c on the sim sources'
@echo ''
@echo 'fresh: remove created files'
#
# When you modify any of the following macros, do 'make clean'
#
# System dependencies
# =============== including ../lib/sysidf.mk here
# This file is part of the auxiliary libraries.
# Written by Dick Grune, dick@dickgrune.com
# $Id: sysidf.mk,v 1.17 2014-07-28 09:18:13 dick Exp $
#
################################################################
# For UNIX-like systems
SYSTEM = UNIX
SUBSYSTEM = SOLARIS
# Locations
DIR = /home/wangx
BINDIR = /usr/bin
MAN1DIR = /usr/share/man/man1
# Commands
COPY = cp -p
EXE = #
LEX = flex
LN = ln
ZIP = zip -o
################################################################
# For MSDOS + MinGW
#SYSTEM = MSDOS
#SUBSYSTEM = MinGW
# Locations
#DIR = C:/BIN
#BINDIR = C:/BIN
#MAN1DIR = C:/BIN
# Commands (cp required, since xcopy cannot handle forward slashes)
#COPY = cp -p
#EXE = .exe
#LEX = flex
#LN = ln
#ZIP = zip -o
################################################################
# General, compiling:
CC = gcc -D$(SYSTEM) -D$(SUBSYSTEM)
LINT = lint -ansi -D$(SYSTEM) -D$(SUBSYSTEM)
LINTFLAGS = -xh
# General, manual:
GROFF = groff -man
GROFF = man2pdf
.SUFFIXES: .1 .3 .pdf
.1.pdf:
$(GROFF) $<
.3.pdf:
$(GROFF) $<
# =============== end of ../lib/sysidf.mk
# Compiling
MEMORY = -DMEMLEAK -DMEMCLOBBER
CFLAGS = -DVERSION=$(VERSION) $(MEMORY) -O4
LIBFLAGS = #
LINTFLAGS = $(MEMORY) -h# -X
LOADFLAGS = -s# # strip symbol table
LOADER = $(CC) $(LOADFLAGS)
# Debugging
CFLAGS += -DDEBUG
DEBUG_C = debug.c
DEBUG_O = debug.o
DEBUG_H = debug.h
# T E S T P A R A M E T E R S
# percentage test
TEST_LANG = c
TEST_OPT = -p
TEST_INP = *.l
# text test
TEST_LANG = text
TEST_OPT = -r 5
TEST_INP = test_seplet
# Rumen Stevanov test
TEST_LANG = text
TEST_OPT = -p
TEST_INP = Rumen_Stefanov/new/*.txt
# Kuhl test 1
TEST_LANG = c
TEST_OPT = -p
TEST_INP = Kuhl/simc1.c Kuhl/simc2.c
# Kuhl test 2
TEST_LANG = c
TEST_OPT = -p
TEST_INP = Kuhl/simc2.c Kuhl/simc1.c
# -i option test
TEST_LANG = c
TEST_OPT = -f -r 20 -R -i <option-i.inp
TEST_INP = #
# regular test
TEST_LANG = c
TEST_OPT = -r24
TEST_INP = pass3.c
# I N T R O D U C T I O N
# Each module (set of programs that together perform some function)
# has the following sets of files defined for it:
# _FLS all files of that module, for, e.g.,
# sharring, inventory, etc.
# _SRC the source files, from which other files derive
# _CFS the C-files, from which the object files derive
# _OBJ object files
# _GRB garbage files produced by compiling the module
#
# (This is a feeble attempt at software-engineering a Makefile.)
#
test: sim.res stream.res percentages.res # three simple tests
# B I N A R I E S
BINARIES = sim_c$(EXE) sim_java$(EXE) sim_pasc$(EXE) sim_m2$(EXE) \
sim_lisp$(EXE) sim_mira$(EXE) sim_text$(EXE)
binaries: $(BINARIES)
EXES = sim_c.exe sim_java.exe sim_pasc.exe sim_m2.exe \
sim_lisp.exe sim_mira.exe sim_text.exe
exes: $(EXES)
# A U X I L I A R Y M O D U L E S
# Common modules:
COM_CFS = token.c lex.c stream.c text.c tokenarray.c error.c $(DEBUG_C) \
ForEachFile.c fname.c Malloc.c any_int.c
COM_OBJ = token.o lex.o stream.o text.o tokenarray.o error.o $(DEBUG_O) \
ForEachFile.o fname.o Malloc.o any_int.o
COM_SRC = token.h lex.h stream.h text.h tokenarray.h error.h $(DEBUG_H) \
ForEachFile.h fname.h Malloc.h any_int.h \
lang.h language.h \
sortlist.spc sortlist.bdy system.par $(COM_CFS)
COM_FLS = $(COM_SRC)
# C files for the abstract modules:
ABS_CFS = lang.c language.c
# The idf module:
IDF_CFS = idf.c
IDF_OBJ = idf.o
IDF_SRC = idf.h $(IDF_CFS)
IDF_FLS = $(IDF_SRC)
# The runs package:
RUNS_CFS = runs.c percentages.c
RUNS_OBJ = runs.o percentages.o
RUNS_SRC = runs.h percentages.h aiso.spc aiso.bdy $(RUNS_CFS)
RUNS_FLS = $(RUNS_SRC)
# The main program:
MAIN_CFS = sim.c options.c newargs.c hash.c compare.c add_run.c \
pass1.c pass2.c pass3.c
MAIN_OBJ = sim.o options.o newargs.o hash.o compare.o add_run.o \
pass1.o pass2.o pass3.o
MAIN_SRC = sim.h options.h newargs.h hash.h compare.h add_run.h \
pass1.h pass2.h pass3.h \
debug.par settings.par $(MAIN_CFS)
MAIN_FLS = $(MAIN_SRC)
# The similarity tester without the language part:
SIM_CFS = $(COM_CFS) $(IDF_CFS) $(RUNS_CFS) $(MAIN_CFS)
SIM_OBJ = $(COM_OBJ) $(IDF_OBJ) $(RUNS_OBJ) $(MAIN_OBJ)
SIM_SRC = $(COM_SRC) $(IDF_SRC) $(RUNS_SRC) $(MAIN_SRC)
SIM_FLS = $(COM_FLS) $(IDF_FLS) $(RUNS_FLS) $(MAIN_FLS)
# L A N G U A G E S
# The algollike module:
ALG_CFS = algollike.c
ALG_OBJ = algollike.o
ALG_SRC = algollike.h $(ALG_CFS)
ALG_FLS = $(ALG_SRC)
# The C Language module: # C
CLANG_CFS = clang.c
CLANG_OBJ = clang.o
CLANG_SRC = clang.l
CLANG_FLS = $(CLANG_SRC)
clang.c: clang.l
$(LEX) -t clang.l >$@
SIM_C_CFS = $(SIM_CFS) $(ALG_CFS) $(CLANG_CFS)
SIM_C_OBJ = $(SIM_OBJ) $(ALG_OBJ) $(CLANG_OBJ)
sim_c$(EXE): $(SIM_C_OBJ)
$(LOADER) $(SIM_C_OBJ) -o $@
SIM_GRB += clang.c sim_c
$(BINDIR)/sim_c$(EXE): sim_c$(EXE)
$(COPY) sim_c$(EXE) $@
# The Java Language module: # Java
JAVALANG_CFS = javalang.c
JAVALANG_OBJ = javalang.o
JAVALANG_SRC = javalang.l
JAVALANG_FLS = $(JAVALANG_SRC)
javalang.c: javalang.l
$(LEX) -t javalang.l >$@
SIM_JAVA_CFS = $(SIM_CFS) $(ALG_CFS) $(JAVALANG_CFS)
SIM_JAVA_OBJ = $(SIM_OBJ) $(ALG_OBJ) $(JAVALANG_OBJ)
sim_java$(EXE): $(SIM_JAVA_OBJ)
$(LOADER) $(SIM_JAVA_OBJ) -o $@
SIM_GRB += javalang.c sim_java
$(BINDIR)/sim_java$(EXE): sim_java$(EXE)
$(COPY) sim_java$(EXE) $@
# The Pascal Language module: # Pascal
PASCLANG_CFS = pascallang.c
PASCLANG_OBJ = pascallang.o
PASCLANG_SRC = pascallang.l
PASCLANG_FLS = $(PASCLANG_SRC)
pascallang.c: pascallang.l
$(LEX) -t pascallang.l >pascallang.c
SIM_PASC_CFS = $(SIM_CFS) $(ALG_CFS) $(PASCLANG_CFS)
SIM_PASC_OBJ = $(SIM_OBJ) $(ALG_OBJ) $(PASCLANG_OBJ)
sim_pasc$(EXE): $(SIM_PASC_OBJ)
$(LOADER) $(SIM_PASC_OBJ) -o $@
SIM_GRB += pascallang.c sim_pasc
$(BINDIR)/sim_pasc$(EXE): sim_pasc$(EXE)
$(COPY) sim_pasc$(EXE) $@
# The Modula-2 Language module: # Modula-2
M2LANG_CFS = m2lang.c
M2LANG_OBJ = m2lang.o
M2LANG_SRC = m2lang.l
M2LANG_FLS = $(M2LANG_SRC)
m2lang.c: m2lang.l
$(LEX) -t m2lang.l >$@
SIM_M2_CFS = $(SIM_CFS) $(ALG_CFS) $(M2LANG_CFS)
SIM_M2_OBJ = $(SIM_OBJ) $(ALG_OBJ) $(M2LANG_OBJ)
sim_m2$(EXE): $(SIM_M2_OBJ)
$(LOADER) $(SIM_M2_OBJ) -o $@
SIM_GRB += m2lang.c sim_m2
$(BINDIR)/sim_m2$(EXE): sim_m2$(EXE)
$(COPY) sim_m2$(EXE) $@
# The Lisp Language module: # Lisp
LISPLANG_CFS = lisplang.c
LISPLANG_OBJ = lisplang.o
LISPLANG_SRC = lisplang.l
LISPLANG_FLS = $(LISPLANG_SRC)
lisplang.c: lisplang.l
$(LEX) -t lisplang.l >$@
SIM_LISP_CFS = $(SIM_CFS) $(ALG_CFS) $(LISPLANG_CFS)
SIM_LISP_OBJ = $(SIM_OBJ) $(ALG_OBJ) $(LISPLANG_OBJ)
sim_lisp$(EXE): $(SIM_LISP_OBJ)
$(LOADER) $(SIM_LISP_OBJ) -o $@
SIM_GRB += lisplang.c sim_lisp
$(BINDIR)/sim_lisp$(EXE): sim_lisp$(EXE)
$(COPY) sim_lisp$(EXE) $@
# The Miranda Language module: # Miranda
MIRALANG_CFS = miralang.c
MIRALANG_OBJ = miralang.o
MIRALANG_SRC = miralang.l
MIRALANG_FLS = $(MIRALANG_SRC)
miralang.c: miralang.l
$(LEX) -t miralang.l >$@
SIM_MIRA_CFS = $(SIM_CFS) $(ALG_CFS) $(MIRALANG_CFS)
SIM_MIRA_OBJ = $(SIM_OBJ) $(ALG_OBJ) $(MIRALANG_OBJ)
sim_mira$(EXE): $(SIM_MIRA_OBJ)
$(LOADER) $(SIM_MIRA_OBJ) -o $@
SIM_GRB += miralang.c sim_mira
$(BINDIR)/sim_mira$(EXE): sim_mira$(EXE)
$(COPY) sim_mira$(EXE) $@
# The Text module: # Text
TEXTLANG_CFS = textlang.c
TEXTLANG_OBJ = textlang.o
TEXTLANG_SRC = textlang.l
TEXTLANG_FLS = $(TEXTLANG_SRC)
textlang.c: textlang.l
$(LEX) -t textlang.l >$@
SIM_TEXT_CFS = $(SIM_CFS) $(TEXTLANG_CFS)
SIM_TEXT_OBJ = $(SIM_OBJ) $(TEXTLANG_OBJ)
sim_text$(EXE): $(SIM_TEXT_OBJ)
$(LOADER) $(SIM_TEXT_OBJ) -o $@
SIM_GRB += textlang.c sim_text
$(BINDIR)/sim_text$(EXE): sim_text$(EXE)
$(COPY) sim_text$(EXE) $@
# T E S T S
# Some simple tests:
.PHONY: sim.res percentages.res
sim.res: sim_$(TEST_LANG)$(EXE) # no TEST_INP required, for error tests
./sim_$(TEST_LANG)$(EXE) $(TEST_OPT) $(TEST_INP)
stream.res: sim_$(TEST_LANG)$(EXE)
./sim_$(TEST_LANG)$(EXE) -- $(TEST_OPT) $(TEST_INP) >$@
wc $@ $(TEST_INP)
percentages.res:sim_$(TEST_LANG)$(EXE)
./sim_$(TEST_LANG)$(EXE) -p $(TEST_OPT) $(TEST_INP)
TEST_GRB = stream.res
# More simple tests, using the C version only:
simsim: sim_c$(EXE) $(SIM_CFS) $(ALG_CFS)
./sim_c$(EXE) -fr 20 $(SIM_CFS) $(ALG_CFS)
# Lint
lint: $(SIM_SRC) $(ALG_SRC) $(ABS_CFS)
$(LINT) $(LINTFLAGS) $(SIM_CFS) $(ALG_CFS) $(ABS_CFS)
# O T H E R E N T R I E S
# Sets of files: general, modules, main programs, languages
CFS = $(SIM_CFS) $(ALG_CFS) \
$(CLANG_CFS) $(JAVALANG_CFS) $(PASCLANG_CFS) $(M2LANG_CFS) \
$(LISPLANG_CFS) $(MIRALANG_CFS) $(TEXTLANG_CFS)
OBJ = $(SIM_OBJ) $(ALG_OBJ) \
$(CLANG_OBJ) $(JAVALANG_OBJ) $(PASCLANG_OBJ) $(M2LANG_OBJ) \
$(LISPLANG_OBJ) $(MIRALANG_OBJ) $(TEXTLANG_OBJ)
SRC = $(SIM_SRC) $(ALG_SRC) \
$(CLANG_SRC) $(JAVALANG_SRC) $(PASCLANG_SRC) $(M2LANG_SRC) \
$(LISPLANG_SRC) $(MIRALANG_SRC) $(TEXTLANG_SRC)
FLS = $(SIM_FLS) $(ALG_FLS) \
$(CLANG_FLS) $(JAVALANG_FLS) $(PASCLANG_FLS) $(M2LANG_FLS) \
$(LISPLANG_FLS) $(MIRALANG_FLS) $(TEXTLANG_FLS) \
sysidf.mk sysidf.msdos sysidf.unix
DOC = README sim.1 sim.txt sim.html ChangeLog Answers TechnReport
# Documentation
man: sim.pdf
# Installation
install_all: install # just a synonym
install: $(MAN1DIR)/sim.1 \
$(BINDIR)/sim_c$(EXE) \
$(BINDIR)/sim_java$(EXE) \
$(BINDIR)/sim_pasc$(EXE) \
$(BINDIR)/sim_m2$(EXE) \
$(BINDIR)/sim_lisp$(EXE) \
$(BINDIR)/sim_mira$(EXE) \
$(BINDIR)/sim_text$(EXE)
$(MAN1DIR)/sim.1: sim.1
$(COPY) sim.1 $@
# Clean-up
.PHONY: clean fresh
clean:
-rm -f *.o
-rm -f $(SIM_GRB)
-rm -f $(TEST_GRB)
-rm -f $(DOC_GRB)
-rm -f a.out a.exe sim.txt core mon.out
fresh: clean
-rm -f *.exe
# D E P E N D E N C I E S
# DO NOT DELETE THIS LINE -- make depend depends on it.
ForEachFile.o: ForEachFile.c ForEachFile.h fname.h
Malloc.o: Malloc.c any_int.h Malloc.h
add_run.o: add_run.c sim.h debug.par text.h runs.h aiso.spc percentages.h \
Malloc.h options.h error.h add_run.h
algollike.o: algollike.c options.h error.h token.h algollike.h
any_int.o: any_int.c any_int.h
clang.o: clang.c options.h token.h language.h algollike.h idf.h lex.h \
lang.h
compare.o: compare.c sim.h text.h token.h tokenarray.h hash.h language.h \
options.h add_run.h compare.h debug.par
count_sim_dup.o: count_sim_dup.c
debug.o: debug.c debug.h
error.o: error.c sim.h error.h
fname.o: fname.c fname.h
hash.o: hash.c system.par debug.par sim.h text.h Malloc.h error.h \
any_int.h token.h language.h tokenarray.h options.h hash.h
idf.o: idf.c system.par token.h idf.h
javalang.o: javalang.c options.h token.h language.h algollike.h idf.h \
lex.h lang.h
lang.o: lang.c token.h language.h algollike.h idf.h lex.h lang.h
language.o: language.c token.h language.h
lex.o: lex.c lex.h
lisplang.o: lisplang.c token.h language.h algollike.h lex.h lang.h idf.h
m2lang.o: m2lang.c options.h token.h language.h algollike.h idf.h lex.h \
lang.h
miralang.o: miralang.c token.h language.h algollike.h lex.h lang.h idf.h
newargs.o: newargs.c sim.h ForEachFile.h fname.h Malloc.h error.h \
newargs.h
options.o: options.c options.h
pascallang.o: pascallang.c options.h token.h language.h algollike.h idf.h \
lex.h lang.h
pass1.o: pass1.c debug.par sim.h text.h token.h tokenarray.h lang.h \
error.h options.h pass1.h
pass2.o: pass2.c debug.par sim.h token.h text.h lang.h pass2.h \
sortlist.bdy
pass3.o: pass3.c system.par debug.par sim.h text.h token.h runs.h \
aiso.spc Malloc.h error.h options.h pass3.h percentages.h
percentages.o: percentages.c debug.par sim.h text.h runs.h aiso.spc \
options.h Malloc.h error.h percentages.h sortlist.bdy
runs.o: runs.c sim.h text.h runs.h aiso.spc debug.par aiso.bdy Malloc.h
sim.o: sim.c system.par settings.par sim.h options.h newargs.h token.h \
language.h error.h text.h runs.h aiso.spc hash.h compare.h pass1.h \
pass2.h pass3.h percentages.h stream.h lang.h Malloc.h any_int.h
stream.o: stream.c system.par sim.h options.h token.h lang.h stream.h
t.o: t.c
text.o: text.c debug.par sim.h token.h stream.h lang.h Malloc.h options.h \
error.h text.h
textlang.o: textlang.c sim.h token.h idf.h lex.h lang.h language.h
token.o: token.c token.h
tokenarray.o: tokenarray.c error.h Malloc.h token.h lang.h tokenarray.h
3. 编译
make test
make install