diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| l--------- | README.md | 4 | ||||
| -rw-r--r-- | Smarker/assessments.py | 101 | ||||
| -rw-r--r-- | Smarker/requirements.txt | 3 | ||||
| -rw-r--r-- | docs/source/_static/QuickStart/simple_assessment.yml | 12 | ||||
| -rw-r--r-- | docs/source/_static/QuickStart/simple_submission_1/euclid.py | 22 | ||||
| -rw-r--r-- | docs/source/_static/QuickStart/simple_submission_2/euclid.py | 10 | ||||
| -rw-r--r-- | docs/source/_static/QuickStart/simple_submission_3/euclid.py | 11 | ||||
| -rw-r--r-- | docs/source/_static/QuickStart/simple_submission_4/euclid.py | 16 | ||||
| -rw-r--r-- | docs/source/_static/readme_matrix.png | bin | 0 -> 17528 bytes | |||
| -rw-r--r-- | docs/source/_static/report.txt | 9 | ||||
| -rw-r--r-- | docs/source/_static/simple.json | 60 | ||||
| -rw-r--r-- | docs/source/_static/simple.txt | 89 | ||||
| -rw-r--r-- | docs/source/assessments.rst | 24 | ||||
| -rw-r--r-- | docs/source/docker.rst | 8 | ||||
| -rw-r--r-- | docs/source/index.rst | 6 | ||||
| -rw-r--r-- | docs/source/quickstart.rst | 97 | ||||
| -rw-r--r-- | docs/source/readme.md | 4 | ||||
| -rw-r--r-- | docs/source/reflect.rst | 23 | 
19 files changed, 478 insertions, 22 deletions
| @@ -3,6 +3,7 @@ out/  *.zip  smarker.conf  *.aux +*.pickle  # Byte-compiled / optimized / DLL files  __pycache__/ @@ -32,3 +32,7 @@ File with an exception    Using pytest + + + +Plagarism and collusion detection matrix diff --git a/Smarker/assessments.py b/Smarker/assessments.py index b8eb6f0..cdcdcad 100644 --- a/Smarker/assessments.py +++ b/Smarker/assessments.py @@ -1,8 +1,12 @@  from dataclasses import dataclass +from matplotlib import pyplot as plt +import numpy as np  import misc_classes  import configparser  import jinja_helpers  import pycode_similar +import pandas as pd +import pickle  import subprocess  import operator  import database @@ -15,6 +19,11 @@ import re  @dataclass  class SimilarityMetric: +    """Abstract class for getting a metric of similariry between two python objects. +    By default it uses pycode_similar as a metric, but this can be changed by overriding +    ``get_similarity()``. There is also the additional attribute ``details`` for getting +    a breakdown of similarity. +    """      code_text_1:str      code_text_2:str      id_1:int @@ -37,10 +46,60 @@ class SimilarityMetric:                  self.details += line.decode()      def get_similarity(self): +        """Gets the similarity between the two codes. + +        Returns: +            float: A percentage similarity metric +        """          return float(re.findall(r"\d+\.\d+\s", self.details)[0]) +def visualise_matrix(dataframe:pd.DataFrame, file_name): +    """Visualize and draw a similarity matrix. Simply shows the figure, +    therefore this doesn't work in docker. + +    Args: +        dataframe (pandas.DataFrame): Pandas dataframe representing the similarity +        file_name (str): The file name that corrisponds to the dataframe. Used as the title +    """ +    print(file_name) +    print(dataframe) + +    values = dataframe.values + +    fig, ax = plt.subplots() +    ax.matshow(values, alpha = 0.3, cmap = plt.cm.Reds) + +    # axes labels +    xaxis = np.arange(len(dataframe.columns)) +    ax.set_xticks(xaxis) +    ax.set_yticks(xaxis) +    ax.set_xticklabels(dataframe.columns) +    ax.set_yticklabels(dataframe.index) + +    # labelling each point +    for i in range(values.shape[0]): +        for j in range(values.shape[1]): +            if i == j: +                ax.text(x = j, y = i, s = "N/A", va = 'center', ha = 'center') +            else: +                ax.text(x = j, y = i, s = values[i, j], va = 'center', ha = 'center') +     +    plt.title(file_name) +    plt.show() + -def generate_plagarism_report(assessment_name, db): +def generate_plagarism_report(assessment_name, db:database.SmarkerDatabase): +    """Generates a plagarism report for the given ``assessment_name``. Only +    fetches submissions with present files and without any exceptions. + +    Args: +        assessment_name (str): The name of the assessment to fetch submissions from +        db (database.SmarkerDatabase): An open database object is required + +    Returns: +        dict: dict of ``pandas.core.frame.DataFrame`` objects indexed by the required file name +    """ +    # get submissions with files and no exception      required_files = db.get_assessments_required_files(assessment_name)      submission_ids_to_get = set()      assessments = db.get_submissions(assessment_name) @@ -56,27 +115,31 @@ def generate_plagarism_report(assessment_name, db):              un_added_student_nos.remove(id_) +    # get similarity matrix +    report = {}      codes = db.get_submission_codes(submission_ids_to_get)      for file_name, submissions in codes.items(): +        d = {} +        d_details = {}          with tempfile.TemporaryDirectory() as td: -            print(file_name, len(submissions))              for student_id, code in submissions: -                with open(os.path.join(td, "%i.py" % student_id), "w") as f: -                    f.write(code) - -            cmd = ["pycode_similar"] + [os.path.join(td, f) for f in os.listdir(td)] -            print(" ".join(cmd)) -            proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) -            stdout = "" -            while True: -                line = proc.stdout.readline() -                if not line: -                    break -                stdout += line.decode() - -            print(stdout) -            input("skfhsk") -                     +                d[student_id] = [] +                d_details[student_id] = [] +                for student_id_2, code_2 in submissions: +                    sm = SimilarityMetric(code, code_2, student_id, student_id_2) +                    # print("%i and %i = %.3f" % (student_id, student_id_2, SimilarityMetric(code, code_2, student_id, student_id_2).get_similarity())) +                    d[student_id].append(sm.get_similarity()) +                    d_details[student_id].append(sm) +        index = [i[0] for i in submissions] +        visualise_matrix(pd.DataFrame(d, index = index), file_name) +        report[file_name] = pd.DataFrame(d_details, index = index) + +    out_path = os.path.realpath("plagarism_report_details.pickle") +    with open(out_path, "wb") as f: +        pickle.dump(report, f) +    print("Written report to %s" % out_path) + +    return report  def getparser():      config = configparser.ConfigParser() @@ -116,7 +179,7 @@ def getparser():          "-s", "--create_student",          action = misc_classes.EnvDefault,          envvar = "create_student", -        help = "Add a student in the form e.g. 123456789,Eden,Attenborough,E.Attenborough@uea.ac.uk", +        help = "Add a student in the form e.g. 123456789,Eden Attenborough,E.Attenborough@uea.ac.uk",          required = False      )      parser.add_argument( diff --git a/Smarker/requirements.txt b/Smarker/requirements.txt index a8fef17..3be9c36 100644 --- a/Smarker/requirements.txt +++ b/Smarker/requirements.txt @@ -10,3 +10,6 @@ pdfkit  lxml
  pymysql
  pycode_similar
 +pandas
 +matplotlib
 +numpy
 diff --git a/docs/source/_static/QuickStart/simple_assessment.yml b/docs/source/_static/QuickStart/simple_assessment.yml new file mode 100644 index 0000000..414f00b --- /dev/null +++ b/docs/source/_static/QuickStart/simple_assessment.yml @@ -0,0 +1,12 @@ +name: simple_assessment +files: +    - euclid.py: +        functions: +            - gcd(2) +        tests: +            - | +                assert euclid.gcd(8,12) == 4 +        run: +            - python euclid.py: +                regexes: +                    - ^4 diff --git a/docs/source/_static/QuickStart/simple_submission_1/euclid.py b/docs/source/_static/QuickStart/simple_submission_1/euclid.py new file mode 100644 index 0000000..f72707a --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_1/euclid.py @@ -0,0 +1,22 @@ +# the newest! +# assessment 1 + +def gcd(m,n) -> int: +    """Calculates the greatest common denominator between two numbers. + +    Args: +        x (int): Number One +        y (int): Number Two + +    Returns: +        int: The GCD of the two numbers +    """ +    if m< n: +        (m,n) = (n,m) +    if(m%n) == 0: +        return n +    else: +        return (gcd(n, m % n)) # recursion taking place + +# gcd +print(gcd(8,12)) diff --git a/docs/source/_static/QuickStart/simple_submission_2/euclid.py b/docs/source/_static/QuickStart/simple_submission_2/euclid.py new file mode 100644 index 0000000..0819bc5 --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_2/euclid.py @@ -0,0 +1,10 @@ +def gcd(m,n): +    if m< n: +        (m,n) = (n,m) +    if(m%n) == 0: +        return n +    else: +        return (gcd(n, m % n)) # recursion taking place + +# calling function with parameters and printing it out +print(gcd(8,12)) diff --git a/docs/source/_static/QuickStart/simple_submission_3/euclid.py b/docs/source/_static/QuickStart/simple_submission_3/euclid.py new file mode 100644 index 0000000..73e7d9c --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_3/euclid.py @@ -0,0 +1,11 @@ +def gcd(p,q): +    """Docstring gcd""" +    if p < q: +        (p,q) = (q,p) +    if(p%q) == 0: +        return q +    else: +        return (gcd(q, p % q)) # recursion taking place + +# calling function with parameters and printing it out +print(gcd(8,12)) diff --git a/docs/source/_static/QuickStart/simple_submission_4/euclid.py b/docs/source/_static/QuickStart/simple_submission_4/euclid.py new file mode 100644 index 0000000..064d1e5 --- /dev/null +++ b/docs/source/_static/QuickStart/simple_submission_4/euclid.py @@ -0,0 +1,16 @@ +# assessment A +# student id: 4 + +def gcd(x,y): +    if x > y: +        small = y +    else: +        small = x +    for i in range(1, small+1): +        if((x % i == 0) and (y % i == 0)): +            g = i +               +    return g + +# calling function with parameters and printing it out +print(gcd(8,12)) diff --git a/docs/source/_static/readme_matrix.png b/docs/source/_static/readme_matrix.pngBinary files differ new file mode 100644 index 0000000..e91358b --- /dev/null +++ b/docs/source/_static/readme_matrix.png diff --git a/docs/source/_static/report.txt b/docs/source/_static/report.txt new file mode 100644 index 0000000..b78e20b --- /dev/null +++ b/docs/source/_static/report.txt @@ -0,0 +1,9 @@ +euclid.py
 +        2  ...       1
 +2  100.00  ...   94.74
 +3  100.00  ...   94.74
 +4   63.16  ...   57.89
 +1   94.74  ...  100.00
 +
 +[4 rows x 4 columns]
 +Written report to /Smarker/plagarism_report_details.pickle
 diff --git a/docs/source/_static/simple.json b/docs/source/_static/simple.json new file mode 100644 index 0000000..40accc7 --- /dev/null +++ b/docs/source/_static/simple.json @@ -0,0 +1,60 @@ +{ +    "files": [ +        { +            "euclid.py": { +                "functions": [ +                    { +                        "gcd(2)": { +                            "present": true, +                            "documentation": { +                                "comments": "None", +                                "doc": "Docstring gcd" +                            }, +                            "arguments": "(p, q)", +                            "minimum_arguments": 2, +                            "source_code": "def gcd(p,q):\n    \"\"\"Docstring gcd\"\"\"\n    if p < q:\n        (p,q) = (q,p)\n    if(p%q) == 0:\n        return q\n    else:\n        return (gcd(q, p % q)) # recursion taking place" +                        } +                    } +                ], +                "run": [ +                    { +                        "python euclid.py": { +                            "regexes": { +                                "^4": [ +                                    "4" +                                ] +                            }, +                            "full_output": "4\n" +                        } +                    } +                ], +                "tests": [ +                    "assert euclid.gcd(8,12) == 4\n" +                ], +                "present": true, +                "has_exception": false, +                "documentation": { +                    "comments": "None", +                    "doc": "None" +                } +            } +        } +    ], +    "name": "simple_assessment", +    "student_no": "123456790", +    "test_results": { +        "pytest_report": "============================= test session starts ==============================\nplatform linux -- Python 3.10.4, pytest-7.1.1, pluggy-1.0.0 -- /usr/bin/python3\ncachedir: .pytest_cache\nrootdir: /tmp/tmpjzy020i4/simple_submission_3\ncollecting ... collected 1 item\n\n../../../../../../tmp/tmpjzy020i4/simple_submission_3/test_euclid.py::test_1 PASSED [100%]\n\n--------------- generated xml file: /tmp/tmpyu0qypji/report.xml ----------------\n============================== 1 passed in 0.01s ===============================\n", +        "junitxml": "<?xml version=\"1.0\" encoding=\"utf-8\"?><testsuites><testsuite name=\"pytest\" errors=\"0\" failures=\"0\" skipped=\"0\" tests=\"1\" time=\"0.019\" timestamp=\"2022-05-01T15:03:57.143881\" hostname=\"thonkpad2\"><testcase classname=\"test_euclid\" name=\"test_1\" time=\"0.001\" /></testsuite></testsuites>", +        "meta": { +            "name": "pytest", +            "errors": "0", +            "failures": "0", +            "skipped": "0", +            "tests": "1", +            "time": "0.019", +            "timestamp": "2022-05-01T15:03:57.143881", +            "hostname": "thonkpad2" +        } +    }, +    "class_tree": {} +} diff --git a/docs/source/_static/simple.txt b/docs/source/_static/simple.txt new file mode 100644 index 0000000..b6dcc16 --- /dev/null +++ b/docs/source/_static/simple.txt @@ -0,0 +1,89 @@ +============================= test session starts ============================== +platform linux -- Python 3.10.4, pytest-7.1.2, pluggy-1.0.0 -- /usr/bin/python3 +cachedir: .pytest_cache +rootdir: /tmp/tmp398_c3x6/simple_submission_1 +collecting ... collected 1 item + +../tmp/tmp398_c3x6/simple_submission_1/test_euclid.py::test_1 PASSED     [100%] + +--------------- generated xml file: /tmp/tmpceag5_nn/report.xml ---------------- +============================== 1 passed in 0.01s =============================== +4 +=== simple_assessment - Student ID: 1 Automatic marking report === +Report generated at 2022-05-01 15:49:15.701124 + +== Class Tree: == + +{} + + +== File Analysis == + +    = euclid.py = +        Documentation: +            28 characters long +            Comments: +                ``` +                # the newest! +                # assessment 1 +                ``` +            Docstring: +                *** No docstring present *** +        Functions: +            gcd(2): +                Arguments: +                    (m, n) -> int +                    Enough? YES +                Documentation: +                    164 characters long +                    Comments: +                        *** No comments present *** +                    Docstring: +            ``` +            Calculates the greatest common denominator between two numbers. + +            Args: +                x (int): Number One +                y (int): Number Two + +            Returns: +                int: The GCD of the two numbers +            ``` +                Source: +                    15 lines (356 characters) +                    Code: +            ``` +            def gcd(m,n) -> int: +                """Calculates the greatest common denominator between two numbers. + +                Args: +                    x (int): Number One +                    y (int): Number Two + +                Returns: +                    int: The GCD of the two numbers +                """ +                if m< n: +                    (m,n) = (n,m) +                if(m%n) == 0: +                    return n +                else: +                    return (gcd(n, m % n)) # recursion taking place +            ``` +        Runtime Analysis: +            Command `python euclid.py`: +                Monitor: +                    stdout +                Regexes: +                    `^4`: +                        Found occurrences: 1 +                        Occurrences list: +                            4 +                Full runtime output: +                ``` +                    4 +     +                ``` + + + diff --git a/docs/source/assessments.rst b/docs/source/assessments.rst new file mode 100644 index 0000000..a8d7311 --- /dev/null +++ b/docs/source/assessments.rst @@ -0,0 +1,24 @@ +.. _assessments: + +``assessments.py`` +================== + +``assessments.py`` contains many useful arguments for interacting with the database: + +.. argparse:: +   :module: assessments +   :func: getparser +   :prog: python Smarker/assessments.py + +Classes +******* + +.. autoclass:: assessments.SimilarityMetric +    :members: + +Functions +********* + +.. autofunction:: assessments.visualise_matrix + +.. autofunction:: assessments.generate_plagarism_report
\ No newline at end of file diff --git a/docs/source/docker.rst b/docs/source/docker.rst index 7c3237a..232c7f4 100644 --- a/docs/source/docker.rst +++ b/docs/source/docker.rst @@ -41,4 +41,10 @@ To list assessments in the database using docker:  .. code-block:: bash -    sudo docker run -it --entrypoint python --rm smarker assessments.py --list yes
\ No newline at end of file +    sudo docker run -it --entrypoint python --rm smarker assessments.py --list yes + +.. code-block:: bash + +    touch out/report.pickle && sudo docker run -v "$(pwd)/out/report.pickle":/Smarker/plagarism_report_details.pickle -it --entrypoint python --rm smarker assessments.py --plagarism_report example + +If a file doesn't exist before it's passed through as a volume in docker, it will be created automatically as a *directory*- this causes issues if the docker image produces a file so we make a blank file first.
\ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index e36cc86..f2d7426 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,5 +1,7 @@  .. mdinclude:: readme.md +Read the :ref:`quickstart`. +  Setting up  ---------- @@ -26,6 +28,8 @@ Please note that the ``-o`` flag is required for rendering to PDFs.  ``assessments.py`` contains many useful arguments for interacting with the database: +Also see :ref:`assessments` +  .. argparse::     :module: assessments     :func: getparser @@ -37,11 +41,13 @@ Please note that the ``-o`` flag is required for rendering to PDFs.     reflect.rst     database.rst +   assessments.rst  .. toctree::     :maxdepth: 2     :caption: Other Pages: +   quickstart.rst     configfile.rst     docker.rst     assessmentyaml.rst diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst new file mode 100644 index 0000000..08f3cec --- /dev/null +++ b/docs/source/quickstart.rst @@ -0,0 +1,97 @@ +.. _quickstart: + +Quick start guide +================= + +This guide implements a simple assessment to make a *greatest common denominator* function. + +First make an assessment yaml file: + +.. literalinclude:: _static/QuickStart/simple_assessment.yml +    :linenos: +    :language: yaml + +This expects a single function called ``gcd()`` in a file called ``euclid.py`` with no fewer +than two arguments. It expects it to print ``4`` to stdout when executed. It also runs pytest +on the function. + +Then add it to the database: + +.. code-block:: bash + +    docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -c /tmp/assessment.yml + +If using windows, I recommend using the mingw shell since powershell is bad at dealing with relative file paths in docker. + +Then add some students: + +.. code-block:: bash + +    docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "1,Alice,a.bar@uea.ac.uk" +    docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "2,Bob,b.bar@uea.ac.uk" +    docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "3,Christina,c.bar@uea.ac.uk" +    docker run -v "$(pwd)/docs/source/_static/QuickStart/simple_assessment.yml":/tmp/assessment.yml -it --entrypoint python --rm smarker assessments.py -s "4,Dan,d.bar@uea.ac.uk" + +Now we are ready to make some reports! The submissions are zip files with the student's id as the name. First lets just use the default parameters: + +.. code-block:: bash + +    docker run -v "$(pwd)/docs/source/_static/QuickStart/1.zip":/tmp/1.zip -e submission=/tmp/1.zip -e assessment=simple_assessment --rm smarker + +This prints out the result as text to stdout: + +.. literalinclude:: _static/simple.txt + +Smarker can render to text, markdown, json, yaml and PDF, and produce less information, but for now we'll only use the defaults.  +Do the same for the other three submissions. + +We can now generate a plagarism report. But first, lets look at the actual submitted files. Here's the submission from student 1: + +.. literalinclude:: _static/QuickStart/simple_submission_1/euclid.py +    :linenos: +    :language: python + +Student 2: + +.. literalinclude:: _static/QuickStart/simple_submission_2/euclid.py +    :linenos: +    :language: python + +Student 3: + +.. literalinclude:: _static/QuickStart/simple_submission_3/euclid.py +    :linenos: +    :language: python + +Student 4: + +.. literalinclude:: _static/QuickStart/simple_submission_4/euclid.py +    :linenos: +    :language: python + +From this we can tell that student 2 has copied from student 1 (or the other way around), changing only the header comments. +Student 3 has also copied from student 1, but has changed the variable names in an attempt to hide it. Submission 4 is completely different. + +Now we can generate a plagarism report: + +.. code-block:: bash + +    touch out/report.pickle && sudo docker run -v "$(pwd)/out/report.pickle":/Smarker/plagarism_report_details.pickle -it --entrypoint python --rm smarker assessments.py --plagarism_report simple_assessment + +Which produces a pickled report matrix, and prints out to stdout: + +.. code-block:: text + +            2       3       4       1 +    2  100.00  100.00   42.86   94.74 +    3  100.00  100.00   42.86   94.74 +    4   63.16   63.16  100.00   57.89 +    1   94.74   94.74   39.29  100.00 +    Written report to /Smarker/plagarism_report_details.pickle + +If we run it outside of docker, we can also get it rendered nicely in matplotlib: + +.. image:: _static/readme_matrix.png + +The matrix isn't symmetrical, which is intentional, since it considers the difference in complexity between submissions. This can be useful for +finding the culprit in copying.
\ No newline at end of file diff --git a/docs/source/readme.md b/docs/source/readme.md index 2914835..3b61499 100644 --- a/docs/source/readme.md +++ b/docs/source/readme.md @@ -32,3 +32,7 @@ File with an exception    Using pytest + + + +Plagarism and collusion detection matrix diff --git a/docs/source/reflect.rst b/docs/source/reflect.rst index c059206..6c0767a 100644 --- a/docs/source/reflect.rst +++ b/docs/source/reflect.rst @@ -1,5 +1,24 @@  ``reflect.py``: Getting information about code  ============================================== -.. automodule:: reflect -    :members:
\ No newline at end of file +Classes +******* + +.. autoclass:: reflect.Reflect +    :members: + +.. autoexception::  reflect.MonitoredFileNotInProducedFilesException + +Thrown if the user has tried to monitor a file that isn't in the list of produced files in the :ref:`assessmentyaml`. + +Functions +********* + +.. autofunction:: reflect.gen_reflection_report + +Generates a json file report. It is quite a complex structure, but it is made so users can add other rendering templates  +later on. For example, the :ref:`quickstart` looks like this: + +.. literalinclude:: _static/simple.json +    :linenos: +    :language: yaml
\ No newline at end of file | 
