summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
l---------README.md4
-rw-r--r--Smarker/assessments.py101
-rw-r--r--Smarker/requirements.txt3
-rw-r--r--docs/source/_static/QuickStart/simple_assessment.yml12
-rw-r--r--docs/source/_static/QuickStart/simple_submission_1/euclid.py22
-rw-r--r--docs/source/_static/QuickStart/simple_submission_2/euclid.py10
-rw-r--r--docs/source/_static/QuickStart/simple_submission_3/euclid.py11
-rw-r--r--docs/source/_static/QuickStart/simple_submission_4/euclid.py16
-rw-r--r--docs/source/_static/readme_matrix.pngbin0 -> 17528 bytes
-rw-r--r--docs/source/_static/report.txt9
-rw-r--r--docs/source/_static/simple.json60
-rw-r--r--docs/source/_static/simple.txt89
-rw-r--r--docs/source/assessments.rst24
-rw-r--r--docs/source/docker.rst8
-rw-r--r--docs/source/index.rst6
-rw-r--r--docs/source/quickstart.rst97
-rw-r--r--docs/source/readme.md4
-rw-r--r--docs/source/reflect.rst23
19 files changed, 478 insertions, 22 deletions
diff --git a/.gitignore b/.gitignore
index 78a3197..db2ce2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ out/
*.zip
smarker.conf
*.aux
+*.pickle
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/README.md b/README.md
index 2914835..3b61499 120000
--- a/README.md
+++ b/README.md
@@ -32,3 +32,7 @@ File with an exception
![pytest](https://smarker.eda.gay/_static/readme_pytest.png)
Using pytest
+
+![matrix](https://smarker.eda.gay/_static/readme_matrix.png)
+
+Plagarism and collusion detection matrix
diff --git a/Smarker/assessments.py b/Smarker/assessments.py
index b8eb6f0..cdcdcad 100644
--- a/Smarker/assessments.py
+++ b/Smarker/assessments.py
@@ -1,8 +1,12 @@
from dataclasses import dataclass
+from matplotlib import pyplot as plt
+import numpy as np
import misc_classes
import configparser
import jinja_helpers
import pycode_similar
+import pandas as pd
+import pickle
import subprocess
import operator
import database
@@ -15,6 +19,11 @@ import re
@dataclass
class SimilarityMetric:
+ """Abstract class for getting a metric of similariry between two python objects.
+ By default it uses pycode_similar as a metric, but this can be changed by overriding
+ ``get_similarity()``. There is also the additional attribute ``details`` for getting
+ a breakdown of similarity.
+ """
code_text_1:str
code_text_2:str
id_1:int
@@ -37,10 +46,60 @@ class SimilarityMetric:
self.details += line.decode()
def get_similarity(self):
+ """Gets the similarity between the two codes.
+
+ Returns:
+ float: A percentage similarity metric
+ """
return float(re.findall(r"\d+\.\d+\s", self.details)[0])
+def visualise_matrix(dataframe:pd.DataFrame, file_name):
+ """Visualize and draw a similarity matrix. Simply shows the figure,
+ therefore this doesn't work in docker.
+
+ Args:
+ dataframe (pandas.DataFrame): Pandas dataframe representing the similarity
+ file_name (str): The file name that corrisponds to the dataframe. Used as the title
+ """
+ print(file_name)
+ print(dataframe)
+
+ values = dataframe.values
+
+ fig, ax = plt.subplots()
+ ax.matshow(values, alpha = 0.3, cmap = plt.cm.Reds)
+
+ # axes labels
+ xaxis = np.arange(len(dataframe.columns))
+ ax.set_xticks(xaxis)
+ ax.set_yticks(xaxis)
+ ax.set_xticklabels(dataframe.columns)
+ ax.set_yticklabels(dataframe.index)
+
+ # labelling each point
+ for i in range(values.shape[0]):
+ for j in range(values.shape[1]):
+ if i == j:
+ ax.text(x = j, y = i, s = "N/A", va = 'center', ha = 'center')
+ else:
+ ax.text(x = j, y = i, s = values[i, j], va = 'center', ha = 'center')
+
+ plt.title(file_name)
+ plt.show()
+
-def generate_plagarism_report(assessment_name, db):
+def generate_plagarism_report(assessment_name, db:database.SmarkerDatabase):
+ """Generates a plagarism report for the given ``assessment_name``. Only
+ fetches submissions with present files and without any exceptions.
+
+ Args:
+ assessment_name (str): The name of the assessment to fetch submissions from
+ db (database.SmarkerDatabase): An open database object is required
+
+ Returns:
+ dict: dict of ``pandas.core.frame.DataFrame`` objects indexed by the required file name
+ """
+ # get submissions with files and no exception
required_files = db.get_assessments_required_files(assessment_name)
submission_ids_to_get = set()
assessments = db.get_submissions(assessment_name)
@@ -56,27 +115,31 @@ def generate_plagarism_report(assessment_name, db):
un_added_student_nos.remove(id_)
+ # get similarity matrix
+ report = {}
codes = db.get_submission_codes(submission_ids_to_get)
for file_name, submissions in codes.items():
+ d = {}
+ d_details = {}
with tempfile.TemporaryDirectory() as td:
- print(file_name, len(submissions))
for student_id, code in submissions:
- with open(os.path.join(td, "%i.py" % student_id), "w") as f:
- f.write(code)
-
- cmd = ["pycode_similar"] + [os.path.join(td, f) for f in os.listdir(td)]
- print(" ".join(cmd))
- proc = subprocess.Popen(cmd, stdout = subprocess.PIPE)
- stdout = ""
- while True:
- line = proc.stdout.readline()
- if not line:
- break
- stdout += line.decode()
-
- print(stdout)
- input("skfhsk")
-
+ d[student_id] = []
+ d_details[student_id] = []
+ for student_id_2, code_2 in submissions:
+ sm = SimilarityMetric(code, code_2, student_id, student_id_2)
+ # print("%i and %i = %.3f" % (student_id, student_id_2, SimilarityMetric(code, code_2, student_id, student_id_2).get_similarity()))
+ d[student_id].append(sm.get_similarity())
+ d_details[student_id].append(sm)
+ index = [i[0] for i in submissions]
+ visualise_matrix(pd.DataFrame(d, index = index), file_name)
+ report[file_name] = pd.DataFrame(d_details, index = index)
+
+ out_path = os.path.realpath("plagarism_report_details.pickle")
+ with open(out_path, "wb") as f:
+ pickle.dump(report, f)
+ print("Written report to %s" % out_path)
+
+ return report
def getparser():
config = configparser.ConfigParser()
@@ -116,7 +179,7 @@ def getparser():
"-s", "--create_student",
action = misc_classes.EnvDefault,
envvar = "create_student",
- help = "Add a student in the form e.g. 123456789,Eden,Attenborough,E.Attenborough@uea.ac.uk",
+ help = "Add a student in the form e.g. 123456789,Eden Attenborough,E.Attenborough@uea.ac.uk",
required = False
)
parser.add_argument(
diff --git a/Smarker/requirements.txt b/Smarker/requirements.txt
index a8fef17..3be9c36 100644
--- a/Smarker/requirements.txt
+++ b/Smarker/requirements.txt
@@ -10,3 +10,6 @@ pdfkit
lxml
pymysql
pycode_similar
+pandas
+matplotlib
+numpy
diff --git a/docs/source/_static/QuickStart/simple_assessment.yml b/docs/source/_static/QuickStart/simple_assessment.yml
new file mode 100644
index 0000000..414f00b
--- /dev/null
+++ b/docs/source/_static/QuickStart/simple_assessment.yml
@@ -0,0 +1,12 @@
+name: simple_assessment
+files:
+ - euclid.py:
+ functions:
+ - gcd(2)
+ tests:
+ - |
+ assert euclid.gcd(8,12) == 4
+ run:
+ - python euclid.py:
+ regexes:
+ - ^4
diff --git a/docs/source/_static/QuickStart/simple_submission_1/euclid.py b/docs/source/_static/QuickStart/simple_submission_1/euclid.py
new file mode 100644
index 0000000..f72707a
--- /dev/null
+++ b/docs/source/_static/QuickStart/simple_submission_1/euclid.py
@@ -0,0 +1,22 @@
+# the newest!
+# assessment 1
+
+def gcd(m,n) -> int:
+ """Calculates the greatest common denominator between two numbers.
+
+ Args:
+ x (int): Number One
+ y (int): Number Two
+
+ Returns:
+ int: The GCD of the two numbers
+ """
+ if m< n:
+ (m,n) = (n,m)
+ if(m%n) == 0:
+ return n
+ else:
+ return (gcd(n, m % n)) # recursion taking place
+
+# gcd
+print(gcd(8,12))
diff --git a/docs/source/_static/QuickStart/simple_submission_2/euclid.py b/docs/source/_static/QuickStart/simple_submission_2/euclid.py
new file mode 100644
index 0000000..0819bc5
--- /dev/null
+++ b/docs/source/_static/QuickStart/simple_submission_2/euclid.py
@@ -0,0 +1,10 @@
+def gcd(m,n):
+ if m< n:
+ (m,n) = (n,m)
+ if(m%n) == 0:
+ return n
+ else:
+ return (gcd(n, m % n)) # recursion taking place
+
+# calling function with parameters and printing it out
+print(gcd(8,12))
diff --git a/docs/source/_static/QuickStart/simple_submission_3/euclid.py b/docs/source/_static/QuickStart/simple_submission_3/euclid.py
new file mode 100644
index 0000000..73e7d9c
--- /dev/null
+++ b/docs/source/_static/QuickStart/simple_submission_3/euclid.py
@@ -0,0 +1,11 @@
+def gcd(p,q):
+ """Docstring gcd"""
+ if p < q:
+ (p,q) = (q,p)
+ if(p%q) == 0:
+ return q
+ else:
+ return (gcd(q, p % q)) # recursion taking place
+
+# calling function with parameters and printing it out
+print(gcd(8,12))
diff --git a/docs/source/_static/QuickStart/simple_submission_4/euclid.py b/docs/source/_static/QuickStart/simple_submission_4/euclid.py
new file mode 100644
index 0000000..064d1e5
--- /dev/null
+++ b/docs/source/_static/QuickStart/simple_submission_4/euclid.py
@@ -0,0 +1,16 @@
+# assessment A
+# student id: 4
+
+def gcd(x,y):
+ if x > y:
+ small = y
+ else:
+ small = x
+ for i in range(1, small+1):
+ if((x % i == 0) and (y % i == 0)):
+ g = i
+
+ return g
+
+# calling function with parameters and printing it out
+print(gcd(8,12))
diff --git a/docs/source/_static/readme_matrix.png b/docs/source/_static/readme_matrix.png
new file mode 100644
index 0000000..e91358b
--- /dev/null
+++ b/docs/source/_static/readme_matrix.png
Binary files differ
diff --git a/docs/source/_static/report.txt b/docs/source/_static/report.txt
new file mode 100644
index 0000000..b78e20b
--- /dev/null
+++ b/docs/source/_static/report.txt
@@ -0,0 +1,9 @@
+euclid.py
+ 2 ... 1
+2 100.00 ... 94.74
+3 100.00 ... 94.74
+4 63.16 ... 57.89
+1 94.74 ... 100.00
+
+[4 rows x 4 columns]
+Written report to /Smarker/plagarism_report_details.pickle
diff --git a/docs/source/_static/simple.json b/docs/source/_static/simple.json
new file mode 100644
index 0000000..40accc7
--- /dev/null
+++ b/docs/source/_static/simple.json
@@ -0,0 +1,60 @@
+{
+ "files": [
+ {
+ "euclid.py": {
+ "functions": [
+ {
+ "gcd(2)": {
+ "present": true,
+ "documentation": {
+ "comments": "None",
+ "doc": "Docstring gcd"
+ },
+ "arguments": "(p, q)",
+ "minimum_arguments": 2,
+ "source_code": "def gcd(p,q):\n \"\"\"Docstring gcd\"\"\"\n if p < q:\n (p,q) = (q,p)\n if(p%q) == 0:\n return q\n else:\n return (gcd(q, p % q)) # recursion taking place"
+ }
+ }
+ ],
+ "run": [
+ {
+ "python euclid.py": {
+ "regexes": {
+ "^4": [
+ "4"
+ ]
+ },
+ "full_output": "4\n"
+ }
+ }
+ ],
+ "tests": [
+ "assert euclid.gcd(8,12) == 4\n"
+ ],
+ "present": true,
+ "has_exception": false,
+ "documentation": {
+ "comments": "None",
+ "doc": "None"
+ }
+ }
+ }
+ ],
+ "name": "simple_assessment",
+ "student_no": "123456790",
+ "test_results": {
+ "pytest_report": "============================= test session starts ==============================\nplatform linux -- Python 3.10.4, pytest-7.1.1, pluggy-1.0.0 -- /usr/bin/python3\ncachedir: .pytest_cache\nrootdir: /tmp/tmpjzy020i4/simple_submission_3\ncollecting ... collected 1 item\n\n../../../../../../tmp/tmpjzy020i4/simple_submission_3/test_euclid.py::test_1 PASSED [100%]\n\n--------------- generated xml file: /tmp/tmpyu0qypji/report.xml ----------------\n============================== 1 passed in 0.01s ===============================\n",
+ "junitxml": "<?xml version=\"1.0\" encoding=\"utf-8\"?><testsuites><testsuite name=\"pytest\" errors=\"0\" failures=\"0\" skipped=\"0\" tests=\"1\" time=\"0.019\" timestamp=\"2022-05-01T15:03:57.143881\" hostname=\"thonkpad2\"><testcase classname=\"test_euclid\" name=\"test_1\" time=\"0.001\" /></testsuite></testsuites>",
+ "meta": {
+ "name": "pytest",
+ "errors": "0",
+ "failures": "0",
+ "skipped": "0",
+ "tests": "1",
+ "time": "0.019",
+ "timestamp": "2022-05-01T15:03:57.143881",
+ "hostname": "thonkpad2"
+ }
+ },
+ "class_tree": {}
+}
diff --git a/docs/source/_static/simple.txt b/docs/source/_static/simple.txt
new file mode 100644
index 0000000..b6dcc16
--- /dev/null
+++ b/docs/source/_static/simple.txt
@@ -0,0 +1,89 @@
+============================= test session starts ==============================
+platform linux -- Python 3.10.4, pytest-7.1.2, pluggy-1.0.0 -- /usr/bin/python3
+cachedir: .pytest_cache
+rootdir: /tmp/tmp398_c3x6/simple_submission_1
+collecting ... collected 1 item
+
+../tmp/tmp398_c3x6/simple_submission_1/test_euclid.py::test_1 PASSED [100%]
+
+--------------- generated xml file: /tmp/tmpceag5_nn/report.xml ----------------
+============================== 1 passed in 0.01s ===============================
+4
+=== simple_assessment - Student ID: 1 Automatic marking report ===
+Report generated at 2022-05-01 15:49:15.701124
+
+== Class Tree: ==
+
+{}
+
+
+== File Analysis ==
+
+ = euclid.py =
+ Documentation:
+ 28 characters long
+ Comments:
+ ```
+ # the newest!
+ # assessment 1
+ ```
+ Docstring:
+ *** No docstring present ***
+ Functions:
+ gcd(2):
+ Arguments:
+ (m, n) -> int
+ Enough? YES
+ Documentation:
+ 164 characters long
+ Comments:
+ *** No comments present ***
+ Docstring:
+ ```
+ Calculates the greatest common denominator between two numbers.
+
+ Args:
+ x (int): Number One
+ y (int): Number Two
+
+ Returns:
+ int: The GCD of the two numbers
+ ```
+ Source:
+ 15 lines (356 characters)
+ Code:
+ ```
+ def gcd(m,n) -> int:
+ """Calculates the greatest common denominator between two numbers.
+
+ Args:
+ x (int): Number One
+ y (int): Number Two
+
+ Returns:
+ int: The GCD of the two numbers
+ """
+ if m< n:
+ (m,n) = (n,m)
+ if(m%n) == 0:
+ return n
+ else:
+ return (gcd(n, m % n)) # recursion taking place
+ ```
+ Runtime Analysis:
+ Command `python euclid.py`:
+ Monitor:
+ stdout
+ Regexes:
+ `^4`:
+ Found occurrences: 1
+ Occurrences list:
+ 4
+ Full runtime output:
+ ```
+ 4
+
+ ```
+
+
+
diff --git a/docs/source/assessments.rst b/docs/source/assessments.rst
new file mode 100644
index 0000000..a8d7311
--- /dev/null
+++ b/docs/source/assessments.rst
@@ -0,0 +1,24 @@
+.. _assessments:
+
+``assessments.py``
+==================
+
+``assessments.py`` contains many useful arguments for interacting with the database:
+
+.. argparse::
+ :module: assessments
+ :func: getparser
+ :prog: python Smarker/assessments.py
+
+Classes
+*******
+
+.. autoclass:: assessments.SimilarityMetric
+ :members:
+
+Functions
+*********
+
+.. autofunction:: assessments.visualise_matrix
+
+.. autofunction:: assessments.generate_plagarism_report \ No newline at end of file
diff --git a/docs/source/docker.rst b/docs/source/docker.rst
index 7c3237a..232c7f4 100644
--- a/docs/source/docker.rst
+++ b/docs/source/docker.rst
@@ -41,4 +41,10 @@ To list assessments in the database using docker:
.. code-block:: bash
- sudo docker run -it --entrypoint python --rm smarker assessments.py --list yes \ No newline at end of file
+ sudo docker run -it --entrypoint python --rm smarker assessments.py --list yes
+
+.. code-block:: bash
+
+ touch out/report.pickle && sudo docker run -v "$(pwd)/out/report.pickle":/Smarker/plagarism_report_details.pickle -it --entrypoint python --rm smarker assessments.py --plagarism_report example
+
+If a file doesn't exist before it's passed through as a volume in docker, it will be created automatically as a *directory*- this causes issues if the docker image produces a file so we make a blank file first. \ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e36cc86..f2d7426 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,5 +1,7 @@
.. mdinclude:: readme.md
+Read the :ref:`quickstart`.
+
Setting up
----------
@@ -26,6 +28,8 @@ Please note that the ``-o`` flag is required for rendering to PDFs.
``assessments.py`` contains many useful arguments for interacting with the database:
+Also see :ref:`assessments`
+
.. argparse::
:module: assessments
:func: getparser
@@ -37,11 +41,13 @@ Please note that the ``-o`` flag is required for rendering to PDFs.
reflect.rst
database.rst
+ assessments.rst
.. toctree::
:maxdepth: 2
:caption: Other Pages:
+ quickstart.rst
configfile.rst
docker.rst
assessmentyaml.rst
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
new file mode 100644
index 0000000..08f3cec
--- /dev/null
+++ b/docs/source/quickstart.rst
@@ -0,0 +1,97 @@
+.. _quickstart:
+
+Quick start guide
+=================
+
+This guide implements a simple assessment to make a *greatest common denominator* function.
+
+First make an assessment yaml file:
+
+.. literalinclude:: _static/QuickStart/simple_assessment.yml
+ :linenos:
+ :language: yaml
+
+This expects a single function called ``gcd()`` in a file called ``euclid.py`` with no fewer
+than two arguments. It expects it to print ``4`` to stdout when executed. It also runs pytest
+on the function.
+
+Then add it to the database:
+
+.. code-block:: bash
+
+ docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -c /tmp/assessment.yml
+
+If using windows, I recommend using the mingw shell since powershell is bad at dealing with relative file paths in docker.
+
+Then add some students:
+
+.. code-block:: bash
+
+ docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "1,Alice,a.bar@uea.ac.uk"
+ docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "2,Bob,b.bar@uea.ac.uk"
+ docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "3,Christina,c.bar@uea.ac.uk"
+ docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "4,Dan,d.bar@uea.ac.uk"
+
+Now we are ready to make some reports! The submissions are zip files with the student's id as the name. First lets just use the default parameters:
+
+.. code-block:: bash
+
+ docker run -v "$(pwd)/docs/source/_static/QuickStart/1.zip":/tmp/1.zip -e submission=/tmp/1.zip -e assessment=simple_assessment --rm smarker
+
+This prints out the result as text to stdout:
+
+.. literalinclude:: _static/simple.txt
+
+Smarker can render to text, markdown, json, yaml and PDF, and produce less information, but for now we'll only use the defaults.
+Do the same for the other three submissions.
+
+We can now generate a plagarism report. But first, lets look at the actual submitted files. Here's the submission from student 1:
+
+.. literalinclude:: _static/QuickStart/simple_submission_1/euclid.py
+ :linenos:
+ :language: python
+
+Student 2:
+
+.. literalinclude:: _static/QuickStart/simple_submission_2/euclid.py
+ :linenos:
+ :language: python
+
+Student 3:
+
+.. literalinclude:: _static/QuickStart/simple_submission_3/euclid.py
+ :linenos:
+ :language: python
+
+Student 4:
+
+.. literalinclude:: _static/QuickStart/simple_submission_4/euclid.py
+ :linenos:
+ :language: python
+
+From this we can tell that student 2 has copied from student 1 (or the other way around), changing only the header comments.
+Student 3 has also copied from student 1, but has changed the variable names in an attempt to hide it. Submission 4 is completely different.
+
+Now we can generate a plagarism report:
+
+.. code-block:: bash
+
+ touch out/report.pickle && sudo docker run -v "$(pwd)/out/report.pickle":/Smarker/plagarism_report_details.pickle -it --entrypoint python --rm smarker assessments.py --plagarism_report simple_assessment
+
+Which produces a pickled report matrix, and prints out to stdout:
+
+.. code-block:: text
+
+ 2 3 4 1
+ 2 100.00 100.00 42.86 94.74
+ 3 100.00 100.00 42.86 94.74
+ 4 63.16 63.16 100.00 57.89
+ 1 94.74 94.74 39.29 100.00
+ Written report to /Smarker/plagarism_report_details.pickle
+
+If we run it outside of docker, we can also get it rendered nicely in matplotlib:
+
+.. image:: _static/readme_matrix.png
+
+The matrix isn't symmetrical, which is intentional, since it considers the difference in complexity between submissions. This can be useful for
+finding the culprit in copying. \ No newline at end of file
diff --git a/docs/source/readme.md b/docs/source/readme.md
index 2914835..3b61499 100644
--- a/docs/source/readme.md
+++ b/docs/source/readme.md
@@ -32,3 +32,7 @@ File with an exception
![pytest](https://smarker.eda.gay/_static/readme_pytest.png)
Using pytest
+
+![matrix](https://smarker.eda.gay/_static/readme_matrix.png)
+
+Plagarism and collusion detection matrix
diff --git a/docs/source/reflect.rst b/docs/source/reflect.rst
index c059206..6c0767a 100644
--- a/docs/source/reflect.rst
+++ b/docs/source/reflect.rst
@@ -1,5 +1,24 @@
``reflect.py``: Getting information about code
==============================================
-.. automodule:: reflect
- :members: \ No newline at end of file
+Classes
+*******
+
+.. autoclass:: reflect.Reflect
+ :members:
+
+.. autoexception:: reflect.MonitoredFileNotInProducedFilesException
+
+Thrown if the user has tried to monitor a file that isn't in the list of produced files in the :ref:`assessmentyaml`.
+
+Functions
+*********
+
+.. autofunction:: reflect.gen_reflection_report
+
+Generates a json file report. It is quite a complex structure, but it is made so users can add other rendering templates
+later on. For example, the :ref:`quickstart` looks like this:
+
+.. literalinclude:: _static/simple.json
+ :linenos:
+ :language: yaml \ No newline at end of file