diff options
Diffstat (limited to 'Smarker')
| -rw-r--r-- | Smarker/assessments.py | 80 | ||||
| -rw-r--r-- | Smarker/database.py | 46 | ||||
| -rw-r--r-- | Smarker/temp.py | 16 | 
3 files changed, 107 insertions, 35 deletions
| diff --git a/Smarker/assessments.py b/Smarker/assessments.py index 4f32353..b8eb6f0 100644 --- a/Smarker/assessments.py +++ b/Smarker/assessments.py @@ -1,28 +1,82 @@ +from dataclasses import dataclass  import misc_classes  import configparser  import jinja_helpers  import pycode_similar +import subprocess  import operator  import database  import argparse  import tempfile  import yaml +import json  import os +import re -def generate_plagarism_report(codes): -    for file_name, codes in codes.items(): +@dataclass +class SimilarityMetric: +    code_text_1:str +    code_text_2:str +    id_1:int +    id_2:int + +    def __post_init__(self): +        with tempfile.TemporaryDirectory() as td: +            with open(os.path.join(td, "%i.py" % self.id_1), "w") as f: +                f.write(self.code_text_1) + +            with open(os.path.join(td, "%i.py" % self.id_2), "w") as f: +                f.write(self.code_text_2) + +            proc = subprocess.Popen(["pycode_similar", "-p", "0", os.path.join(td, "%i.py" % self.id_1), os.path.join(td, "%i.py" % self.id_2)], stdout = subprocess.PIPE) +            self.details = "" +            while True: +                line = proc.stdout.readline() +                if not line: +                    break +                self.details += line.decode() + +    def get_similarity(self): +        return float(re.findall(r"\d+\.\d+\s", self.details)[0]) + + +def generate_plagarism_report(assessment_name, db): +    required_files = db.get_assessments_required_files(assessment_name) +    submission_ids_to_get = set() +    assessments = db.get_submissions(assessment_name) +    un_added_student_nos = {i[0] for i in assessments.keys()} +    for id_, dt in sorted(assessments.keys(), key=operator.itemgetter(0, 1), reverse=True): +        if id_ in un_added_student_nos: +            files = jinja_helpers.flatten_struct(assessments[(id_, dt)][0]["files"]) + +            for file_name in required_files: +                if files[file_name]["present"]: +                    if (not files[file_name]["has_exception"]): +                        submission_ids_to_get.add(assessments[(id_, dt)][1]) + +            un_added_student_nos.remove(id_) +     +    codes = db.get_submission_codes(submission_ids_to_get) +    for file_name, submissions in codes.items():          with tempfile.TemporaryDirectory() as td: -            un_added_student_nos = {i[0] for i in codes.keys()} -            # print(un_added_student_nos) -            for k, v in sorted(codes.keys(), key=operator.itemgetter(0, 1), reverse=True): -                if k in un_added_student_nos: -                    with open(os.path.join(td, "%i.py" % k), "w") as f: -                        f.write(codes[(k, v)]) +            print(file_name, len(submissions)) +            for student_id, code in submissions: +                with open(os.path.join(td, "%i.py" % student_id), "w") as f: +                    f.write(code) + +            cmd = ["pycode_similar"] + [os.path.join(td, f) for f in os.listdir(td)] +            print(" ".join(cmd)) +            proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) +            stdout = "" +            while True: +                line = proc.stdout.readline() +                if not line: +                    break +                stdout += line.decode() + +            print(stdout) +            input("skfhsk") -                    # print("Written %s at %s" % (k, v)) -                    un_added_student_nos.remove(k) -            input("%s..." % td) -            print(pycode_similar.detect(os.listdir(td)))  def getparser():      config = configparser.ConfigParser() @@ -120,7 +174,7 @@ if __name__ == "__main__":              print("Added student %s" % name)          if args["plagarism_report"] is not None: -            generate_plagarism_report(db.get_submission_codes(args["plagarism_report"])) +            generate_plagarism_report(args["plagarism_report"], db)          # print(db.get_assessment_yaml("CMP-4009B-2020-A2")) diff --git a/Smarker/database.py b/Smarker/database.py index d6a2ea5..37a44db 100644 --- a/Smarker/database.py +++ b/Smarker/database.py @@ -204,34 +204,44 @@ class SmarkerDatabase:                  ))          self.__connection.commit() -    def get_submission_codes(self, assessment_name): +    def get_submission_codes(self, submission_ids):          out = {}          with self.__connection.cursor() as cursor: -            cursor.execute("SELECT file_id, file_name FROM assessment_file WHERE assessment_name = %s;", (assessment_name, )) -            for file_id, file_name in cursor.fetchall(): -                out[file_name] = {} - +            for submission_id in submission_ids:                  cursor.execute("""                  SELECT                       submitted_files.file_text,  -                    submissions.student_no,  -                    submissions.submission_dt  +                    submitted_files.file_id,  +                    assessment_file.file_name,  +                    submissions.student_no                   FROM submitted_files  +                INNER JOIN assessment_file  +                ON submitted_files.file_id = assessment_file.file_id                   INNER JOIN submissions  -                ON submissions.submission_id = submitted_files.submission_id  -                WHERE submitted_files.file_id = %s; -                """, (file_id, )) - -                for code, student_no, dt in cursor.fetchall(): -                    out[file_name][(int(student_no), dt)] = code +                ON submissions.submission_id = submitted_files.submission_id +                WHERE submitted_files.submission_id = %s; +                """, (submission_id)) +                 +                for file_contents, id_, file_name, student_no in cursor.fetchall(): +                    if file_contents is not None: +                        try: +                            out[file_name].append((int(student_no), file_contents)) +                        except KeyError: +                            out[file_name] = [(int(student_no), file_contents)]          return out -    def get_most_recent_submission_report(self, assessment_name): +    def get_submissions(self, assessment_name):          with self.__connection.cursor() as cursor: -            cursor.execute("SELECT MAX(submission_id), student_no FROM submissions WHERE assessment_name = %s GROUP BY student_no;", (assessment_name, )) -            return [(int(i[0]), int(i[1]), yaml.safe_load(i[2])) for i in cursor.fetchall()] -                 +            cursor.execute("SELECT student_no, submission_dt, report_yaml, submission_id FROM submissions WHERE assessment_name = %s;", (assessment_name, )) +            return {(int(i[0]), i[1]): (yaml.safe_load(i[2]), int(i[3])) for i in cursor.fetchall()} + +    def get_assessments_required_files(self, assessment_name): +        with self.__connection.cursor() as cursor:        +            cursor.execute("SELECT file_name FROM assessment_file WHERE assessment_name = %s;", (assessment_name, ))    +            return [i[0] for i in cursor.fetchall()]  if __name__ == "__main__":      with SmarkerDatabase(host = "vps.eda.gay", user="root", passwd=input("Input password: "), db="Smarker", port=3307) as db: -        print(db.get_most_recent_submission_report("simple_assessment")) +        # print(db.get_assessments_required_files("example")) +        import json +        print(json.dumps(db.get_submission_codes((24, 21)), indent = 4)) diff --git a/Smarker/temp.py b/Smarker/temp.py index 60b1c18..491729f 100644 --- a/Smarker/temp.py +++ b/Smarker/temp.py @@ -1,6 +1,14 @@ -import json +import assessments +import sys +import os -with open("100301654_report.json", "r") as f: -    tree = json.load(f)["class_tree"] +if __name__ == "__main__": +    with open(sys.argv[1], "r") as f: +        ft1 = f.read() -print(tree)
\ No newline at end of file +    with open(sys.argv[2], "r") as f: +        ft2 = f.read() + +    similarityMetric = assessments.SimilarityMetric(ft1, ft2, 1, 2) +    print(similarityMetric.get_similarity()) +    print(similarityMetric.details)
\ No newline at end of file | 
