Serge Bazanski | 2f712bf | 2021-07-10 16:53:41 +0000 | [diff] [blame] | 1 | # Script to attempt to clean up our owncloud database (b/32) after The Postgres |
| 2 | # Fuckup (b/30). |
| 3 | # |
| 4 | # Think of it as a one-shot fsck, documented in the form of the code that q3k@ |
| 5 | # used to recover from this kerfuffle. |
| 6 | # |
| 7 | # SECURITY: It's full of manual SQL query crafting without parametrization. |
| 8 | # Don't attempt to use it for anything else other than this one-shot usecase. |
| 9 | # |
| 10 | # You will need to tunnel to the postgreses running on Boston: |
| 11 | # $ ssh \ |
| 12 | # -L15432:127.0.0.1:5432 \ |
| 13 | # -L15433:127.0.0.1:5433 \ |
| 14 | # hackerspace.pl |
| 15 | |
| 16 | from datetime import datetime |
| 17 | import os |
| 18 | |
| 19 | import psycopg2 |
| 20 | |
| 21 | |
| 22 | incident_start = 1611529200 # when pg12 started to run |
| 23 | incident_end = 1611788400 # when we rolled back to pg9 |
| 24 | |
| 25 | |
| 26 | OWNCLOUD_PASSWORD = os.environ.get("OWNCLOUD_PASSWORD").strip() |
| 27 | if not OWNCLOUD_PASSWORD: |
| 28 | # Get it from boston, /var/www/owncloud/config/config.php. |
| 29 | raise Exception("OWNCLOUD_PASSWORD must be set to owncloud postgres password") |
| 30 | |
| 31 | |
| 32 | conn9 = psycopg2.connect(host="localhost", port=15432, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud") |
| 33 | conn12 = psycopg2.connect(host="localhost", port=15433, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud") |
| 34 | |
| 35 | |
| 36 | def idset(conn, table, keyname="id"): |
| 37 | """Return a set of IDs from a given table, one per row.""" |
| 38 | cur = conn.cursor() |
| 39 | cur.execute(f"SELECT {keyname} FROM oc_{table}") |
| 40 | res = cur.fetchall() |
| 41 | cur.close() |
| 42 | return set([r[0] for r in res]) |
| 43 | |
| 44 | |
| 45 | def valset(conn, table, keys): |
| 46 | """Return a set of concatenated values for the given keys in a table, one per row.""" |
| 47 | keynames = ", ".join(keys) |
| 48 | cur = conn.cursor() |
| 49 | cur.execute(f"SELECT {keynames} FROM oc_{table}") |
| 50 | res = cur.fetchall() |
| 51 | cur.close() |
| 52 | res = [';;;'.join([str(elem) for elem in r]) for r in res] |
| 53 | return set(res) |
| 54 | |
| 55 | |
| 56 | # Check accounts difference. |
| 57 | # |
| 58 | # RESULT: Thankfully, no accounts have been accidentally roled back. |
| 59 | accounts12 = idset(conn12, "accounts", keyname="uid") |
| 60 | accounts9 = idset(conn9, "accounts", keyname="uid") |
| 61 | print("Accounts missing in 9:", accounts12 - accounts9) |
| 62 | assert (accounts12 - accounts9) == set() |
| 63 | |
| 64 | |
| 65 | def account_by_uid(conn, uid): |
| 66 | """Return SSO UID for a given Owncloud UID.""" |
| 67 | cur = conn.cursor() |
| 68 | cur.execute(f"SELECT ldap_dn FROM oc_ldap_user_mapping WHERE owncloud_name = '{uid}'") |
| 69 | dn, = cur.fetchone() |
| 70 | cur.close() |
| 71 | part = dn.split(',')[0] |
| 72 | assert part.startswith('uid=') |
| 73 | return part[4:] |
| 74 | |
| 75 | |
| 76 | def storage_owner_by_id(conn, id_): |
| 77 | """Return SSO UID for a given storage numerical ID.""" |
| 78 | cur = conn.cursor() |
| 79 | cur.execute(f"SELECT id FROM oc_storages WHERE numeric_id = '{id_}'") |
| 80 | oid, = cur.fetchone() |
| 81 | cur.close() |
| 82 | if oid == 'object::store:amazon::nextcloud': |
| 83 | return "S3" |
| 84 | assert oid.startswith('object::user:') |
| 85 | userid = oid[13:] |
| 86 | assert len(userid) > 0 |
| 87 | if userid == "gallery": |
| 88 | return "GALLERY" |
| 89 | return account_by_uid(conn, userid) |
| 90 | |
| 91 | |
| 92 | # Check shares table. This table contains the intent of sharing some file with someone else. |
| 93 | # |
| 94 | # RESULT: we only have things that have been removed after rollback to PG9, |
| 95 | # nothing was created in PG12 and lost. |
| 96 | shareids12 = idset(conn12, "share") |
| 97 | shareids9 = idset(conn9, "share") |
| 98 | print("Shares missing in 9:", len(shareids12 - shareids9)) |
| 99 | cur12 = conn12.cursor() |
| 100 | for id_ in list(shareids12-shareids9): |
| 101 | cur12.execute(f"SELECT uid_owner, file_target, stime, share_with FROM oc_share WHERE id = {id_}") |
| 102 | uid_owner, file_target, stime, share_with = cur12.fetchone() |
| 103 | account = account_by_uid(conn12, uid_owner) |
| 104 | stime_human = datetime.utcfromtimestamp(stime).strftime('%Y-%m-%d %H:%M:%S') |
| 105 | print(f"Missing share {id_} {file_target} owned by {account}..") |
| 106 | if stime < incident_start or stime > incident_end: |
| 107 | print(f" Skipping, created at {stime_human}") |
| 108 | continue |
| 109 | raise Exception("Unhandled.") |
| 110 | cur12.close() |
| 111 | |
| 112 | |
| 113 | # Check mounts table. This contains root file storages for each user, but also |
| 114 | # incoming shares 'mounted' into a user's account. |
| 115 | # From what I cen tell, storage_id/root_id are the source path that's being |
| 116 | # mounted (root_id being the fileid inside an oc_filecache, and storage_id |
| 117 | # being the storage in which that file is kept), while user_id/mount_point are |
| 118 | # the mount destination (ie. path into which this is mounted for a user's |
| 119 | # view). |
| 120 | # |
| 121 | # RESULT: we only have share-mounts missing for a handful of users. We choose |
| 122 | # to ignore it, as we assume next time these users log in they will get the |
| 123 | # mounts again. |
| 124 | # TODO(q3k): verify this |
| 125 | mounts12 = valset(conn12, "mounts", ["storage_id", "root_id", "user_id", "mount_point"]) |
| 126 | mounts9 = valset(conn9, "mounts", ["storage_id", "root_id", "user_id", "mount_point"]) |
| 127 | print("Mounts missing in 9:", len(mounts12 - mounts9)) |
| 128 | # Mounts that appearify normally whenever you log into owncloud, as they are the result of shares': |
| 129 | mount_names_ok = set(["2020-03-26_covid_templar", "camera", "Public Shaming", "przylbice.md", "Test.txt", "covid"]) |
| 130 | # Mounts that used to be from a share that existed, but has been since deleted in PG9. |
| 131 | mount_names_ok |= set(["Covid-instrukcje", "Chaos_modele_covid", "Covid_proces_presspack"]) |
| 132 | mounts_sorted = [] |
| 133 | for m in list(mounts12 - mounts9): |
| 134 | storage_id, root_id, user_id, mount_point = m.split(';;;') |
| 135 | mounts_sorted.append((storage_id, root_id, user_id, mount_point)) |
| 136 | mounts_sorted = sorted(mounts_sorted, key=lambda el: el[2]) |
| 137 | for storage_id, root_id, user_id, mount_point in mounts_sorted: |
| 138 | assert mount_point.startswith("/" + user_id + "/") |
| 139 | mount_point = mount_point[len(user_id)+1:] |
| 140 | account = account_by_uid(conn12, user_id) |
| 141 | print(f"Missing mount {mount_point}, storage ID {storage_id}, owned by {account}..") |
| 142 | storage_owner = storage_owner_by_id(conn12, storage_id) |
| 143 | print(f" Storage owner: {storage_owner}") |
| 144 | |
| 145 | parts = mount_point.split('/') |
| 146 | if len(parts) == 4 and parts[0] == '' and parts[1] == 'files' and parts[2] in mount_names_ok and parts[3] == '': |
| 147 | print(" Skipping, known okay") |
| 148 | continue |
| 149 | raise Exception("Unhandled") |
| 150 | |